library(tidyverse) # for data wrangling etc
library(rstanarm) # for fitting models in STAN
library(cmdstanr) # for cmdstan
library(brms) # for fitting models in STAN
library(coda) # for diagnostics
library(bayesplot) # for diagnostics
library(ggmcmc) # for MCMC diagnostics
library(DHARMa) # for residual diagnostics
library(rstan) # for interfacing with STAN
library(emmeans) # for marginal means etc
library(broom) # for tidying outputs
library(tidybayes) # for more tidying outputs
library(ggeffects) # for partial plots
library(broom.mixed) # for summarising models
library(ggeffects) # for partial effects plots
library(bayestestR) # for ROPE
library(see) # for some plots
library(easystats) # for the easystats ecosystem
library(patchwork) # for multiple plots
library(modelsummary) # for data and model summaries
library(car) # for scatterplot matrices
library(ggridges) # for ridge plots
theme_set(theme_grey()) # put the default ggplot theme back
source("helperFunctions.R")Bayesian GLM Part4
1 Preparations
Load the necessary libraries
2 Scenario
Loyn (1987) modelled the abundance of forest birds with six predictor variables (patch area, distance to nearest patch, distance to nearest larger patch, grazing intensity, altitude and years since the patch had been isolated).
| ABUND | DIST | LDIST | AREA | GRAZE | ALT | YR.ISOL |
|---|---|---|---|---|---|---|
| .. | .. | .. | .. | .. | .. | .. |
| ABUND | Abundance of forest birds in patch- response variable |
| DIST | Distance to nearest patch - predictor variable |
| LDIST | Distance to nearest larger patch - predictor variable |
| AREA | Size of the patch - predictor variable |
| GRAZE | Grazing intensity (1 to 5, representing light to heavy) - predictor variable |
| ALT | Altitude - predictor variable |
| YR.ISOL | Number of years since the patch was isolated - predictor variable |
The aim of the analysis is to investigate the effects of a range of predictors on the abundance of forest birds.
3 Read in the data
Rows: 56 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (7): ABUND, AREA, YR.ISOL, DIST, LDIST, GRAZE, ALT
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 56
Columns: 7
$ ABUND <dbl> 5.3, 2.0, 1.5, 17.1, 13.8, 14.1, 3.8, 2.2, 3.3, 3.0, 27.6, 1.8…
$ AREA <dbl> 0.1, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.…
$ YR.ISOL <dbl> 1968, 1920, 1900, 1966, 1918, 1965, 1955, 1920, 1965, 1900, 19…
$ DIST <dbl> 39, 234, 104, 66, 246, 234, 467, 284, 156, 311, 66, 93, 39, 40…
$ LDIST <dbl> 39, 234, 311, 66, 246, 285, 467, 1829, 156, 571, 332, 93, 39, …
$ GRAZE <dbl> 2, 5, 5, 3, 5, 3, 5, 5, 4, 5, 3, 5, 2, 1, 5, 5, 3, 3, 3, 2, 2,…
$ ALT <dbl> 160, 60, 140, 160, 140, 130, 90, 60, 130, 130, 210, 160, 210, …
spc_tbl_ [56 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ ABUND : num [1:56] 5.3 2 1.5 17.1 13.8 14.1 3.8 2.2 3.3 3 ...
$ AREA : num [1:56] 0.1 0.5 0.5 1 1 1 1 1 1 1 ...
$ YR.ISOL: num [1:56] 1968 1920 1900 1966 1918 ...
$ DIST : num [1:56] 39 234 104 66 246 234 467 284 156 311 ...
$ LDIST : num [1:56] 39 234 311 66 246 ...
$ GRAZE : num [1:56] 2 5 5 3 5 3 5 5 4 5 ...
$ ALT : num [1:56] 160 60 140 160 140 130 90 60 130 130 ...
- attr(*, "spec")=
.. cols(
.. ABUND = col_double(),
.. AREA = col_double(),
.. YR.ISOL = col_double(),
.. DIST = col_double(),
.. LDIST = col_double(),
.. GRAZE = col_double(),
.. ALT = col_double()
.. )
- attr(*, "problems")=<externalptr>
# A tibble: 6 × 7
ABUND AREA YR.ISOL DIST LDIST GRAZE ALT
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 5.3 0.1 1968 39 39 2 160
2 2 0.5 1920 234 234 5 60
3 1.5 0.5 1900 104 311 5 140
4 17.1 1 1966 66 66 3 160
5 13.8 1 1918 246 246 5 140
6 14.1 1 1965 234 285 3 130
loyn (56 rows and 7 variables, 7 shown)
ID | Name | Type | Missings | Values | N
---+---------+---------+----------+--------------+-----------
1 | ABUND | numeric | 0 (0.0%) | [1.5, 39.6] | 56
---+---------+---------+----------+--------------+-----------
2 | AREA | numeric | 0 (0.0%) | [0.1, 1771] | 56
---+---------+---------+----------+--------------+-----------
3 | YR.ISOL | numeric | 0 (0.0%) | [1890, 1976] | 56
---+---------+---------+----------+--------------+-----------
4 | DIST | numeric | 0 (0.0%) | [26, 1427] | 56
---+---------+---------+----------+--------------+-----------
5 | LDIST | numeric | 0 (0.0%) | [26, 4426] | 56
---+---------+---------+----------+--------------+-----------
6 | GRAZE | numeric | 0 (0.0%) | 1 | 13 (23.2%)
| | | | 2 | 8 (14.3%)
| | | | 3 | 15 (26.8%)
| | | | 4 | 7 (12.5%)
| | | | 5 | 13 (23.2%)
---+---------+---------+----------+--------------+-----------
7 | ALT | numeric | 0 (0.0%) | [60, 260] | 56
-------------------------------------------------------------
| Unique | Missing Pct. | Mean | SD | Min | Median | Max | Histogram | |
|---|---|---|---|---|---|---|---|---|
| ABUND | 54 | 0 | 19.5 | 10.7 | 1.5 | 21.0 | 39.6 | |
| AREA | 36 | 0 | 69.3 | 266.1 | 0.1 | 7.5 | 1771.0 | |
| YR.ISOL | 25 | 0 | 1949.8 | 25.6 | 1890.0 | 1962.5 | 1976.0 | |
| DIST | 31 | 0 | 240.4 | 219.1 | 26.0 | 234.0 | 1427.0 | |
| LDIST | 46 | 0 | 733.3 | 916.1 | 26.0 | 338.5 | 4426.0 | |
| GRAZE | 5 | 0 | 3.0 | 1.5 | 1.0 | 3.0 | 5.0 | |
| ALT | 20 | 0 | 146.2 | 43.5 | 60.0 | 140.0 | 260.0 |
4 Exploratory data analysis
When we explored this analysis from a frequentist perspective, we decided on a log-normal like model. This was a model that was fit against a Gaussian distribution, yet with a log-link. We will replicate that model here in a Bayesian framework.
In the previous exploration of this model, we elected to treat Grazing intensity as a categorical variable - we will again code Grazing intensity as a categorical variable.
Model formula: \[ y_i \sim{} \mathcal{N}(\mu_i, \sigma^2)\\ log(\mu_i) = \boldsymbol{\beta} \bf{X_i}\\ \beta_0 \sim{} \mathcal{N}(3,0.5)\\ \beta_{1-9} \sim{} \mathcal{N}(0,2.5)\\ \sigma \sim{} \mathcal{Gamma}(2,1)\\ OR\\ \sigma \sim{} \mathcal{t}(3,0,2.5) \]
where \(\boldsymbol{\beta}\) is a vector of effects parameters and \(\bf{X}\) is a model matrix representing the additive effects of the scaled versions of distance (ln), distance to the nearest large patch (ln), patch area (ln), grazing intensity, year of isolation and altitude on the abundance of forest birds.
4.1 Scatterplot matrix
To re-acquaint ourselves with the data, I we will revisit the scatterplot matrix that we generated prior to the frequentist analysis.
scatterplotMatrix(~ ABUND + DIST + LDIST + AREA + GRAZE + ALT + YR.ISOL,
data = loyn,
diagonal = list(method = "boxplot")
)We again notice that DIST, LDIST and AREA are skewed, so we will normalise them via a logarithmic transformation.
5 Fit the model
loyn.glm <- glm(
ABUND ~ scale(log(DIST)) + scale(log(LDIST)) + scale(log(AREA)) +
fGRAZE + scale(ALT) + scale(YR.ISOL),
data = loyn,
family = gaussian(link = "log")
)
loyn.glm |> summary()
Call:
glm(formula = ABUND ~ scale(log(DIST)) + scale(log(LDIST)) +
scale(log(AREA)) + fGRAZE + scale(ALT) + scale(YR.ISOL),
family = gaussian(link = "log"), data = loyn)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.092248 0.112856 27.400 < 2e-16 ***
scale(log(DIST)) -0.018067 0.057247 -0.316 0.75373
scale(log(LDIST)) 0.057086 0.059984 0.952 0.34623
scale(log(AREA)) 0.203976 0.059620 3.421 0.00132 **
fGRAZE2 0.039644 0.148978 0.266 0.79134
fGRAZE3 0.019654 0.137752 0.143 0.88717
fGRAZE4 -0.001197 0.156199 -0.008 0.99392
fGRAZE5 -1.121563 0.343631 -3.264 0.00208 **
scale(ALT) -0.003237 0.048607 -0.067 0.94719
scale(YR.ISOL) -0.018246 0.074404 -0.245 0.80737
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 42.40928)
Null deviance: 6337.9 on 55 degrees of freedom
Residual deviance: 1950.8 on 46 degrees of freedom
AIC: 379.76
Number of Fisher Scoring iterations: 6
In rstanarm, the default priors are designed to be weakly informative. They are chosen to provide moderate regularisation (to help prevent over-fitting) and help stabilise the computations.
loyn.rstanarm <- stan_glm(
ABUND ~ scale(log(DIST)) +
scale(log(LDIST)) +
scale(log(AREA)) +
fGRAZE +
scale(ALT) +
scale(YR.ISOL),
data = loyn,
family = gaussian(link = "log"),
iter = 5000, warmup = 2500,
chains = 3, thin = 5, refresh = 0
)Warning: There were 281 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.
Warning: There were 1 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 15. See
https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
Warning: There were 3 chains where the estimated Bayesian Fraction of Missing Information was low. See
https://mc-stan.org/misc/warnings.html#bfmi-low
Warning: Examine the pairs() plot to diagnose sampling problems
Warning: The largest R-hat is 4.38, indicating chains have not mixed.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#r-hat
Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#bulk-ess
Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#tail-ess
Warning: Markov chains did not converge! Do not analyze results!
Conclusions:
- the warning messages suggest that this model has not performed well
- there are a large number of divergent transitions. Divergent transitions occur when the sampler is in a ‘sharp’ part of the posterior and the step length is too large causing the sampler to shoot off the posterior and reject the sample. This leads to inefficient and ineffective MCMC sampling. To alleviate this, we can either:
- review the model itself - it might be misspecified
- adjust the adaptive delta - a parameter that governs the degree of step-length learning that occurs during the warmup stage - the more it learns the less likely the sampler will be to overshoot (although it will take longer to learn)
- review the priors
- the largest R-hat value is large. This suggests that the chains have not converged well
- the effective sample sizes is too low, suggesting that there might be issues with the priors.
- there are a large number of divergent transitions. Divergent transitions occur when the sampler is in a ‘sharp’ part of the posterior and the step length is too large causing the sampler to shoot off the posterior and reject the sample. This leads to inefficient and ineffective MCMC sampling. To alleviate this, we can either:
loyn.rstanarm <- stan_glm(
ABUND ~ scale(log(DIST)) +
scale(log(LDIST)) +
scale(log(AREA)) +
fGRAZE +
scale(ALT) +
scale(YR.ISOL),
data = loyn,
family = gaussian(link = "log"),
iter = 5000, warmup = 2500,
chains = 3, thin = 5, refresh = 0,
adapt_delta = 0.99
)Warning: There were 383 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.
Warning: There were 3 chains where the estimated Bayesian Fraction of Missing Information was low. See
https://mc-stan.org/misc/warnings.html#bfmi-low
Warning: Examine the pairs() plot to diagnose sampling problems
Warning: The largest R-hat is 4.9, indicating chains have not mixed.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#r-hat
Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#bulk-ess
Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#tail-ess
Warning: Markov chains did not converge! Do not analyze results!
There are still a substantial number of divergent transitions. It is possible that the priors are too vague.
Priors for model 'loyn.rstanarm'
------
Intercept (after predictors centered)
Specified prior:
~ normal(location = 0, scale = 2.5)
Adjusted prior:
~ normal(location = 0, scale = 27)
Coefficients
Specified prior:
~ normal(location = [0,0,0,...], scale = [2.5,2.5,2.5,...])
Adjusted prior:
~ normal(location = [0,0,0,...], scale = [26.84,26.84,26.84,...])
Auxiliary (sigma)
Specified prior:
~ exponential(rate = 1)
Adjusted prior:
~ exponential(rate = 0.093)
------
See help('prior_summary.stanreg') for more details
Conclusions:
- the default priors appear to be overly wide. We will instead define our own priors. ### Assessing priors
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Conclusions:
- we see that the range of predictions are extremely wide and the slope could range from strongly negative to strongly positive.
- \(\beta_0\): normal centred at 3 with a standard deviation of 1 (on the log scale, these are the equivalent of 20.1 and 1 respectively)
- mean of 3: since
mean(log(loyn$ABUND)) - sd of 1: since
sd(log(loyn$ABUND))
- mean of 3: since
- \(\beta_1\): normal centred at 0 with a standard deviation of 2.5 (again, consider what this would be on a response scale)
- sd of 1: since
sd(log(loyn$ABUND))/sd(scale(log(loyn$AREA)))(and since each predictor is scaled, it will be the same for each predictor)
- sd of 1: since
- \(\sigma\): half cauchy (flat Gaussian) prior with a scale of 2.
I will also overlay the raw data for comparison.
loyn.rstanarm2 <- stan_glm(
ABUND ~ scale(log(DIST)) +
scale(log(LDIST)) +
scale(log(AREA)) +
fGRAZE +
scale(ALT) +
scale(YR.ISOL),
data = loyn,
family = gaussian(link = "log"),
prior_intercept = normal(3, 1, autoscale = FALSE),
prior = normal(0, 1, autoscale = FALSE),
prior_aux = cauchy(0, 2),
prior_PD = TRUE,
iter = 5000, thin = 5,
chains = 3, warmup = 2500,
refresh = 0
)Conclusions:
- there are no longer any warning messages.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Now lets refit, conditioning on the data.
posterior_vs_prior(loyn.rstanarm3,
color_by = "vs", group_by = TRUE,
facet_args = list(scales = "free_y")
)
Drawing from prior...
Conclusions:
- in each case, the prior is substantially wider than the posterior, suggesting that the posterior is not biased towards the prior.
# ggemmeans(loyn.rstanarm3, ~AREA) |> plot(show_data=TRUE) + scale_y_log10()
ggpredict(loyn.rstanarm3, terms = "AREA[0:1000]") |>
plot(jitter = FALSE, show_data = TRUE) +
scale_x_log10() +
scale_x_log10()Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for x is already present.
Adding another scale for x, which will replace the existing scale.
Warning in scale_x_log10(): log-10 transformation introduced infinite values.
log-10 transformation introduced infinite values.
## ggpredict(loyn.rstanarm3, terms = "AREA[0:1000]") |>
## plot(jitter = FALSE, residuals=TRUE, log.y = TRUE) + scale_x_log10()
ggpredict(loyn.rstanarm3) |>
plot(show_data = TRUE) |>
wrap_plots() &
scale_y_log10()Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Note that for the second of the above are partial plots (effect of one predictor at the average of the continuous predictors and the first level of the categorical predictor), the raw data are not similarly standardised and thus may appear not to match the trends…
In brms, the default priors are designed to be weakly informative. They are chosen to provide moderate regularisation (to help prevent over-fitting) and help stabilise the computations.
Unlike rstanarm, brms models must be compiled before they start sampling. For most models, the compilation of the stan code takes around 45 seconds.
loyn.form <- bf(
ABUND ~ scale(log(DIST)) +
scale(log(LDIST)) +
scale(log(AREA)) +
fGRAZE +
scale(ALT) +
scale(YR.ISOL),
family = gaussian(link = "log")
)
loyn.brm <- brm(loyn.form,
data = loyn,
iter = 5000,
warmup = 2500,
chains = 3, cores = 3,
thin = 5,
refresh = 0,
backend = "rstan"
)Compiling Stan program...
Start sampling
Warning: There were 250 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.
Warning: There were 249 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
Warning: Examine the pairs() plot to diagnose sampling problems
Warning: The largest R-hat is 1.76, indicating chains have not mixed.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#r-hat
Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#bulk-ess
Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#tail-ess
prior class coef group resp dpar nlpar lb ub source
(flat) b default
(flat) b fGRAZE2 (vectorized)
(flat) b fGRAZE3 (vectorized)
(flat) b fGRAZE4 (vectorized)
(flat) b fGRAZE5 (vectorized)
(flat) b scaleALT (vectorized)
(flat) b scalelogAREA (vectorized)
(flat) b scalelogDIST (vectorized)
(flat) b scalelogLDIST (vectorized)
(flat) b scaleYR.ISOL (vectorized)
student_t(3, 3, 2.5) Intercept default
student_t(3, 0, 11.8) sigma 0 default
priors <- prior(normal(0, 2.5), class = "b")
loyn.form <- bf(
ABUND ~ scale(log(DIST)) +
scale(log(LDIST)) +
scale(log(AREA)) +
fGRAZE +
scale(ALT) +
scale(YR.ISOL),
family = gaussian(link = "log")
)
loyn.brm1 <- brm(loyn.form,
data = loyn,
prior = priors,
sample_prior = "only",
iter = 5000,
warmup = 2500,
chains = 3,
thin = 5,
refresh = 0,
backend = "rstan"
)Compiling Stan program...
Start sampling
## Individual plots - the following seems to be broken??
## loyn.brm1 |> ggemmeans(~AREA) |> plot(show_data = TRUE) + scale_y_log10()
loyn.brm1 |>
ggemmeans(~AREA) |>
plot(show_data = TRUE) + scale_y_log10()Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
## All effects
loyn.brm1 |>
conditional_effects() |>
plot(points = TRUE, ask = FALSE, plot = FALSE) |>
wrap_plots() &
scale_y_log10()Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning in scale_y_log10(): log-10 transformation introduced infinite values.
log-10 transformation introduced infinite values.
log-10 transformation introduced infinite values.
Conclusions:
- we see that the range of predictions is fairly wide and the slope could range from strongly negative to strongly positive.
- \(\beta_0\): normal centred at 3 with a standard deviation of 1 (on the log scale, these are the equivalent of 20.1 and 1 respectively)
- mean of 3: since
median(log(loyn$ABUND)) - sd of 0.5: since
mad(log(loyn$ABUND))
- mean of 3: since
- \(\beta_1\): normal centred at 0 with a standard deviation of 2.5 (again, consider what this would be on a response scale)
- sd of 1: since
mad(log(loyn$ABUND))/mad(scale(log(loyn$AREA)))(and since each predictor is scaled, it will be the same for each predictor)
- sd of 1: since
- \(\sigma\): half cauchy (flat Gaussian) prior with a scale of 1.
mod.mat <- model.matrix(as.formula(loyn.form), data = loyn)
mad(log(loyn$ABUND)) /
apply(mod.mat, 2, mad) (Intercept) scale(log(DIST)) scale(log(LDIST)) scale(log(AREA))
Inf 0.6280229 0.5626313 0.5009688
fGRAZE2 fGRAZE3 fGRAZE4 fGRAZE5
Inf Inf Inf Inf
scale(ALT) scale(YR.ISOL)
0.5127373 1.3907494
priors <- prior(normal(3.4, 0.1), class = "Intercept") +
prior(normal(0, 1.5), class = "b") +
## prior(gamma(1, 1), class = 'sigma')
prior(student_t(3, 0, 1.5), class = "sigma")
loyn.form <- bf(
ABUND ~ scale(log(DIST)) +
scale(log(LDIST)) +
scale(log(AREA)) +
fGRAZE +
scale(ALT) +
scale(YR.ISOL),
family = gaussian(link = "log")
)
## family = lognormal())
loyn.brm2 <- brm(loyn.form,
data = loyn,
prior = priors,
sample_prior = "only",
iter = 5000,
warmup = 2500,
chains = 3, cores = 3,
thin = 5,
refresh = 0,
backend = "rstan"
)Compiling Stan program...
Start sampling
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
loyn.brm2 |>
conditional_effects() |>
plot(points = TRUE, ask = FALSE, plot = FALSE) |>
## lapply(function(x) x + scale_y_log10()) |>
wrap_plots() &
scale_y_log10()Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning in scale_y_log10(): log-10 transformation introduced infinite values.
[1] "b_Intercept" "b_scalelogDIST" "b_scalelogLDIST" "b_scalelogAREA"
[5] "b_fGRAZE2" "b_fGRAZE3" "b_fGRAZE4" "b_fGRAZE5"
[9] "b_scaleALT" "b_scaleYR.ISOL" "sigma" "Intercept"
[13] "prior_Intercept" "prior_b" "prior_sigma" "lprior"
[17] "lp__" "accept_stat__" "stepsize__" "treedepth__"
[21] "n_leapfrog__" "divergent__" "energy__"
## loyn.brm3 |> hypothesis('Intercept = 0', class = 'b') |> plot
## loyn.brm3 |> hypothesis('Intercept = 0', class = 'prior') |> plot
loyn.brm3 |>
hypothesis("scalelogDIST = 0") |>
plot()$N
[1] 56
$Y
[1] 5.3 2.0 1.5 17.1 13.8 14.1 3.8 2.2 3.3 3.0 27.6 1.8 21.2 14.6 8.0
[16] 3.5 29.0 2.9 24.3 19.4 24.4 5.0 15.8 25.3 19.5 20.9 16.3 18.8 19.9 13.0
[31] 6.8 21.7 27.8 26.8 16.6 30.4 11.5 26.0 25.7 12.7 23.5 24.9 29.0 28.3 28.3
[46] 32.0 37.7 39.6 29.6 31.0 34.4 27.3 30.5 33.0 29.5 30.9
$K
[1] 10
$Kc
[1] 9
$X
Intercept scalelogDIST scalelogLDIST scalelogAREA fGRAZE2 fGRAZE3 fGRAZE4
1 1 -1.51019511 -1.65892116 -2.37802367 1 0 0
2 1 0.37029448 -0.30532977 -1.51765965 0 0 0
3 1 -0.48079400 -0.09042449 -1.51765965 0 0 0
4 1 -0.95804924 -1.26148217 -1.14712103 0 1 0
5 1 0.42278148 -0.26754922 -1.14712103 0 0 0
6 1 0.37029448 -0.15637842 -1.14712103 0 1 0
7 1 1.09552219 0.21669491 -1.14712103 0 0 0
8 1 0.57353754 1.24803687 -1.14712103 0 0 0
9 1 -0.05524976 -0.61163991 -1.14712103 0 0 1
10 1 0.66885367 0.36858640 -1.14712103 0 0 0
11 1 -0.95804924 -0.04106159 -0.77658241 0 1 0
12 1 -0.59812145 -1.00240327 -0.77658241 0 0 0
13 1 -1.51019511 -1.65892116 -0.77658241 1 0 0
14 1 0.93822292 0.10346964 -0.77658241 0 0 0
15 1 0.47682817 -0.22864597 -0.77658241 0 0 0
16 1 -0.24660010 0.43442972 -0.77658241 0 0 0
17 1 -1.93573935 -1.96523129 -0.55983122 0 1 0
18 1 -1.93573935 -1.96523129 -0.55983122 0 1 0
19 1 -1.48362354 -1.63979473 -0.40604380 0 1 0
20 1 0.47682817 -0.22864597 -0.40604380 1 0 0
21 1 0.37029448 0.29645165 -0.40604380 1 0 0
22 1 -1.93573935 1.38927509 -0.40604380 0 0 0
23 1 -1.51019511 -1.65892116 -0.28675700 0 1 0
24 1 0.85682391 0.04487798 -0.28675700 0 0 0
25 1 -0.59812145 -0.33160908 -0.18929260 0 1 0
26 1 -0.03525827 0.79868571 -0.18929260 0 1 0
27 1 0.57722655 0.69705984 -0.10688762 0 1 0
28 1 -0.22265561 -0.73213997 -0.10688762 0 0 1
29 1 0.50481706 -0.20849935 -0.03550518 0 0 1
30 1 0.79284436 1.26397616 0.02745860 0 0 0
31 1 0.75311975 0.29645165 0.08378161 0 1 0
32 1 -1.89613008 -1.93672022 0.13473198 0 0 1
33 1 -0.03525827 -0.59724988 0.18124602 0 0 1
34 1 -0.22265561 -0.73213997 0.18124602 0 0 1
35 1 -0.12477989 -0.66168825 0.22403479 1 0 0
36 1 -0.12477989 0.09591504 0.30053281 0 1 0
37 1 0.90372228 1.51230754 0.36744180 0 0 0
38 1 -1.48362354 1.66778539 0.39799721 1 0 0
39 1 0.50481706 0.99128245 0.42690016 0 0 1
40 1 0.66885367 1.53439756 0.50527059 0 0 0
41 1 1.35327186 0.40222517 0.59457340 0 0 0
42 1 1.25762760 0.59446805 0.65294853 0 1 0
43 1 0.24667868 -0.39430941 0.70557205 0 0 0
44 1 -0.95804924 -0.01204503 0.73798041 0 0 0
45 1 0.57722655 -0.51197475 0.82485885 1 0 0
46 1 -0.59812145 -1.00240327 0.87580921 0 1 0
47 1 0.47682817 0.98837574 0.92232325 0 1 0
48 1 2.26783778 1.12640241 0.93334579 0 0 0
49 1 0.92772762 1.07832552 0.94414564 0 0 0
50 1 1.09552219 0.21669491 1.01418997 0 0 0
51 1 -1.51019511 0.29645165 1.29286187 1 0 0
52 1 0.93822292 1.91565164 1.35582564 0 0 0
53 1 1.09552219 1.39201101 1.47113789 0 0 0
54 1 -0.12477989 -0.07123733 1.50961306 0 0 0
55 1 0.75311975 1.00336997 2.53095496 0 0 0
56 1 0.73743154 -0.04106159 2.85111978 0 0 0
fGRAZE5 scaleALT scaleYR.ISOL
1 0 0.31588146 0.7134084
2 1 -1.98143824 -1.1629534
3 1 -0.14358248 -1.9447708
4 0 0.31588146 0.6352266
5 1 -0.14358248 -1.2411351
6 0 -0.37331445 0.5961358
7 1 -1.29224233 0.2052271
8 1 -1.98143824 -1.1629534
9 0 -0.37331445 0.5961358
10 1 -0.37331445 -1.9447708
11 0 1.46454131 -0.9284082
12 1 0.31588146 -2.3356795
13 0 1.46454131 0.9088627
14 0 1.46454131 0.8697719
15 1 -0.60304642 -1.9447708
16 1 -0.02871650 -1.9447708
17 0 -0.83277839 0.4788632
18 0 -0.14358248 0.5961358
19 0 1.00507737 0.4006814
20 0 -1.29224233 0.1270453
21 0 1.69427328 0.9088627
22 1 -0.60304642 -1.0456808
23 0 -0.37331445 0.5961358
24 0 -1.06251036 0.6743175
25 0 0.54561343 -2.3356795
26 0 0.08614949 0.4006814
27 0 -0.37331445 0.5961358
28 0 1.46454131 0.4006814
29 0 -0.60304642 0.9088627
30 1 -1.29224233 -1.5538621
31 0 -0.83277839 0.4788632
32 0 0.66047941 0.4006814
33 0 -0.83277839 0.5179540
34 0 1.23480934 0.4006814
35 0 1.00507737 0.7134084
36 0 -0.60304642 0.6352266
37 1 -1.06251036 -1.1629534
38 0 1.00507737 0.6352266
39 0 0.08614949 0.9088627
40 1 -1.29224233 -1.2411351
41 0 -0.14358248 0.5179540
42 0 -0.37331445 0.5961358
43 0 1.00507737 0.9479536
44 0 -0.83277839 0.5961358
45 0 -0.60304642 0.4788632
46 0 1.00507737 0.4006814
47 0 -0.60304642 -0.8502264
48 0 0.77534540 0.8697719
49 0 -0.14358248 0.6743175
50 0 0.43074744 0.5179540
51 0 0.66047941 1.0261353
52 0 -1.75170627 0.5570449
53 0 0.31588146 0.5570449
54 0 1.00507737 -0.3811360
55 0 1.00507737 0.7915901
56 0 2.61320116 -0.6547721
attr(,"assign")
[1] 0 1 2 3 4 4 4 4 5 6
attr(,"contrasts")
attr(,"contrasts")$fGRAZE
2 3 4 5
1 0 0 0 0
2 1 0 0 0
3 0 1 0 0
4 0 0 1 0
5 0 0 0 1
$prior_only
[1] 0
attr(,"class")
[1] "standata" "list"
// generated with brms 2.22.0
functions {
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> K; // number of population-level effects
matrix[N, K] X; // population-level design matrix
int<lower=1> Kc; // number of population-level effects after centering
int prior_only; // should the likelihood be ignored?
}
transformed data {
matrix[N, Kc] Xc; // centered version of X without an intercept
vector[Kc] means_X; // column means of X before centering
for (i in 2:K) {
means_X[i - 1] = mean(X[, i]);
Xc[, i - 1] = X[, i] - means_X[i - 1];
}
}
parameters {
vector[Kc] b; // regression coefficients
real Intercept; // temporary intercept for centered predictors
real<lower=0> sigma; // dispersion parameter
}
transformed parameters {
real lprior = 0; // prior contributions to the log posterior
lprior += normal_lpdf(b | 0, 1.5);
lprior += normal_lpdf(Intercept | 3.4, 0.1);
lprior += student_t_lpdf(sigma | 3, 0, 1.5)
- 1 * student_t_lccdf(0 | 3, 0, 1.5);
}
model {
// likelihood including constants
if (!prior_only) {
// initialize linear predictor term
vector[N] mu = rep_vector(0.0, N);
mu += Intercept + Xc * b;
mu = exp(mu);
target += normal_lpdf(Y | mu, sigma);
}
// priors including constants
target += lprior;
}
generated quantities {
// actual population-level intercept
real b_Intercept = Intercept - dot_product(means_X, b);
// additionally sample draws from priors
real prior_b = normal_rng(0,1.5);
real prior_Intercept = normal_rng(3.4,0.1);
real prior_sigma = student_t_rng(3,0,1.5);
// use rejection sampling for truncated priors
while (prior_sigma < 0) {
prior_sigma = student_t_rng(3,0,1.5);
}
}
6 MCMC sampling diagnostics
The bayesplot package offers a range of MCMC diagnostics as well as Posterior Probability Checks (PPC), all of which have a convenient plot() interface. Lets start with the MCMC diagnostics.
See list of available diagnostics by name
bayesplot MCMC module:
mcmc_acf
mcmc_acf_bar
mcmc_areas
mcmc_areas_ridges
mcmc_combo
mcmc_dens
mcmc_dens_chains
mcmc_dens_overlay
mcmc_hex
mcmc_hist
mcmc_hist_by_chain
mcmc_intervals
mcmc_neff
mcmc_neff_hist
mcmc_nuts_acceptance
mcmc_nuts_divergence
mcmc_nuts_energy
mcmc_nuts_stepsize
mcmc_nuts_treedepth
mcmc_pairs
mcmc_parcoord
mcmc_rank_ecdf
mcmc_rank_hist
mcmc_rank_overlay
mcmc_recover_hist
mcmc_recover_intervals
mcmc_recover_scatter
mcmc_rhat
mcmc_rhat_hist
mcmc_scatter
mcmc_trace
mcmc_trace_highlight
mcmc_violin
Of these, we will focus on:
- mcmc_trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different shade of blue, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
The chains appear well mixed and very similar
- acf (auto-correlation function): plots the auto-correlation between successive MCMC sample lags for each parameter and each chain
There is no evidence of auto-correlation in the MCMC samples
- Rhat: Rhat is a measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
All Rhat values are below 1.05, suggesting the chains have converged.
neff (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).
If the ratios are low, tightening the priors may help.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Ratios all very high.
The rstan package offers a range of MCMC diagnostics. Lets start with the MCMC diagnostics.
Of these, we will focus on:
- stan_trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
The chains appear well mixed and very similar
- stan_acf (auto-correlation function): plots the auto-correlation between successive MCMC sample lags for each parameter and each chain
There is no evidence of auto-correlation in the MCMC samples
- stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
All Rhat values are below 1.05, suggesting the chains have converged.
stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).
If the ratios are low, tightening the priors may help.
Ratios all very high.
The ggmean package also has a set of MCMC diagnostic functions. Lets start with the MCMC diagnostics.
Of these, we will focus on:
- ggs_traceplot: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
The chains appear well mixed and very similar
- gss_autocorrelation (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain
There is no evidence of autocorrelation in the MCMC samples
- stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
All Rhat values are below 1.05, suggesting the chains have converged.
stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).
If the ratios are low, tightening the priors may help.
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
always returns an ungrouped data frame and adjust accordingly.
ℹ The deprecated feature was likely used in the ggmcmc package.
Please report the issue at <https://github.com/xfim/ggmcmc/issues/>.
Ratios all very high.
The bayesplot package offers a range of MCMC diagnostics as well as Posterior Probability Checks (PPC), all of which have a convenient plot() interface. Lets start with the MCMC diagnostics.
See list of available diagnostics by name
bayesplot MCMC module:
mcmc_acf
mcmc_acf_bar
mcmc_areas
mcmc_areas_ridges
mcmc_combo
mcmc_dens
mcmc_dens_chains
mcmc_dens_overlay
mcmc_hex
mcmc_hist
mcmc_hist_by_chain
mcmc_intervals
mcmc_neff
mcmc_neff_hist
mcmc_nuts_acceptance
mcmc_nuts_divergence
mcmc_nuts_energy
mcmc_nuts_stepsize
mcmc_nuts_treedepth
mcmc_pairs
mcmc_parcoord
mcmc_rank_ecdf
mcmc_rank_hist
mcmc_rank_overlay
mcmc_recover_hist
mcmc_recover_intervals
mcmc_recover_scatter
mcmc_rhat
mcmc_rhat_hist
mcmc_scatter
mcmc_trace
mcmc_trace_highlight
mcmc_violin
Of these, we will focus on:
- mcmc_trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different shade of blue, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
The chains appear well mixed and very similar
- acf (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain
There is no evidence of autocorrelation in the MCMC samples
- Rhat: Rhat is a measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
All Rhat values are below 1.05, suggesting the chains have converged.
neff (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).
If the ratios are low, tightening the priors may help.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Ratios all very high.
The rstan package offers a range of MCMC diagnostics. Lets start with the MCMC diagnostics.
Of these, we will focus on:
- stan_trace: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
The chains appear well mixed and very similar
- stan_acf (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain
There is no evidence of autocorrelation in the MCMC samples
- stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
All Rhat values are below 1.05, suggesting the chains have converged.
stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).
If the ratios are low, tightening the priors may help.
Ratios all very high.
The ggmean package also has a set of MCMC diagnostic functions. Lets start with the MCMC diagnostics.
Of these, we will focus on:
- ggs_traceplot: this plots the estimates of each parameter over the post-warmup length of each MCMC chain. Each chain is plotted in a different colour, with each parameter in its own facet. Ideally, each trace should just look like noise without any discernible drift and each of the traces for a specific parameter should look the same (i.e, should not be displaced above or below any other trace for that parameter).
The chains appear well mixed and very similar
- gss_autocorrelation (autocorrelation function): plots the autocorrelation between successive MCMC sample lags for each parameter and each chain
There is no evidence of autocorrelation in the MCMC samples
- stan_rhat: Rhat is a scale reduction factor measure of convergence between the chains. The closer the values are to 1, the more the chains have converged. Values greater than 1.05 indicate a lack of convergence. There will be an Rhat value for each parameter estimated.
All Rhat values are below 1.05, suggesting the chains have converged.
stan_ess (number of effective samples): the ratio of the number of effective samples (those not rejected by the sampler) to the number of samples provides an indication of the effectiveness (and efficiency) of the MCMC sampler. Ratios that are less than 0.5 for a parameter suggest that the sampler spent considerable time in difficult areas of the sampling domain and rejected more than half of the samples (replacing them with the previous effective sample).
If the ratios are low, tightening the priors may help.
Ratios all very high.
7 Model validation
Post predictive checks provide additional diagnostics about the fit of the model. Specifically, they provide a comparison between predictions drawn from the model and the observed data used to train the model.
See list of available diagnostics by name
bayesplot PPC module:
ppc_bars
ppc_bars_grouped
ppc_boxplot
ppc_dens
ppc_dens_overlay
ppc_dens_overlay_grouped
ppc_ecdf_overlay
ppc_ecdf_overlay_grouped
ppc_error_binned
ppc_error_hist
ppc_error_hist_grouped
ppc_error_scatter
ppc_error_scatter_avg
ppc_error_scatter_avg_grouped
ppc_error_scatter_avg_vs_x
ppc_freqpoly
ppc_freqpoly_grouped
ppc_hist
ppc_intervals
ppc_intervals_grouped
ppc_km_overlay
ppc_km_overlay_grouped
ppc_loo_intervals
ppc_loo_pit_ecdf
ppc_loo_pit_overlay
ppc_loo_pit_qq
ppc_loo_ribbon
ppc_pit_ecdf
ppc_pit_ecdf_grouped
ppc_ribbon
ppc_ribbon_grouped
ppc_rootogram
ppc_scatter
ppc_scatter_avg
ppc_scatter_avg_grouped
ppc_stat
ppc_stat_2d
ppc_stat_freqpoly
ppc_stat_freqpoly_grouped
ppc_stat_grouped
ppc_violin_grouped
- dens_overlay: plots the density distribution of the observed data (black line) overlayed on top of 50 density distributions generated from draws from the model (light blue). Ideally, the 50 realisations should be roughly consistent with the observed data.
The model draws appear deviate from the observed data.
- error_scatter_avg: this plots the observed values against the average residuals. Similar to a residual plot, we do not want to see any patterns in this plot. There is some pattern remaining in these residuals.
The predictive error seems to be related to the predictor - the model performs poorest at higher mussel clump areas.
- error_scatter_avg_vs_x: this is similar to a regular residual plot and as such should be interpreted as such. Again, this is not interpretable for binary data.
- intervals: plots the observed data overlayed on top of posterior predictions associated with each level of the predictor. Ideally, the observed data should all fall within the predictive intervals.
The modelled predictions seem to underestimate the uncertainty with increasing mussel clump area.
- ribbon: this is just an alternative way of expressing the above plot.
The shinystan package allows the full suite of MCMC diagnostics and posterior predictive checks to be accessed via a web interface.
DHARMa residuals provide very useful diagnostics. Unfortunately, we cannot directly use the simulateResiduals() function to generate the simulated residuals. However, if we are willing to calculate some of the components yourself, we can still obtain the simulated residuals from the fitted stan model.
We need to supply:
- simulated (predicted) responses associated with each observation.
- observed values
- fitted (predicted) responses (averaged) associated with each observation
preds <- posterior_predict(loyn.rstanarm3, ndraws = 250, summary = FALSE)
loyn.resids <- createDHARMa(
simulatedResponse = t(preds),
observedResponse = loyn$ABUND,
fittedPredictedResponse = apply(preds, 2, median),
integerResponse = FALSE
)
plot(loyn.resids)Conclusions:
- the simulated residuals suggest a general lack of fit due to over dispersion and outliers
- perhaps we should explore a negative binomial model
Post predictive checks provide additional diagnostics about the fit of the model. Specifically, they provide a comparison between predictions drawn from the model and the observed data used to train the model.
See list of available diagnostics by name
bayesplot PPC module:
ppc_bars
ppc_bars_grouped
ppc_boxplot
ppc_dens
ppc_dens_overlay
ppc_dens_overlay_grouped
ppc_ecdf_overlay
ppc_ecdf_overlay_grouped
ppc_error_binned
ppc_error_hist
ppc_error_hist_grouped
ppc_error_scatter
ppc_error_scatter_avg
ppc_error_scatter_avg_grouped
ppc_error_scatter_avg_vs_x
ppc_freqpoly
ppc_freqpoly_grouped
ppc_hist
ppc_intervals
ppc_intervals_grouped
ppc_km_overlay
ppc_km_overlay_grouped
ppc_loo_intervals
ppc_loo_pit_ecdf
ppc_loo_pit_overlay
ppc_loo_pit_qq
ppc_loo_ribbon
ppc_pit_ecdf
ppc_pit_ecdf_grouped
ppc_ribbon
ppc_ribbon_grouped
ppc_rootogram
ppc_scatter
ppc_scatter_avg
ppc_scatter_avg_grouped
ppc_stat
ppc_stat_2d
ppc_stat_freqpoly
ppc_stat_freqpoly_grouped
ppc_stat_grouped
ppc_violin_grouped
- dens_overlay: plots the density distribution of the observed data (black line) overlayed on top of 50 density distributions generated from draws from the model (light blue). Ideally, the 50 realisations should be roughly consistent with the observed data.
The model draws appear deviate from the observed data.
- error_scatter_avg: this plots the observed values against the average residuals. Similar to a residual plot, we do not want to see any patterns in this plot. There is some pattern remaining in these residuals.
Using all posterior draws for ppc type 'error_scatter_avg' by default.
The predictive error seems to be related to the predictor - the model performs poorest at higher mussel clump areas.
- error_scatter_avg_vs_x: this is similar to a regular residual plot and as such should be interpreted as such. Again, this is not interpretable for binary data.
Using all posterior draws for ppc type 'error_scatter_avg_vs_x' by default.
- intervals: plots the observed data overlayed on top of posterior predictions associated with each level of the predictor. Ideally, the observed data should all fall within the predictive intervals.
Using all posterior draws for ppc type 'intervals' by default.
The modelled predictions seem to underestimate the uncertainty with increasing mussel clump area.
- ribbon: this is just an alternative way of expressing the above plot.
Using all posterior draws for ppc type 'ribbon' by default.
The shinystan package allows the full suite of MCMC diagnostics and posterior predictive checks to be accessed via a web interface.
DHARMa residuals provide very useful diagnostics. Unfortunately, we cannot directly use the simulateResiduals() function to generate the simulated residuals. However, if we are willing to calculate some of the components yourself, we can still obtain the simulated residuals from the fitted stan model.
We need to supply:
- simulated (predicted) responses associated with each observation.
- observed values
- fitted (predicted) responses (averaged) associated with each observation
preds <- loyn.brm3 |> posterior_predict(ndraws = 250, summary = FALSE)
loyn.resids <- createDHARMa(
simulatedResponse = t(preds),
observedResponse = loyn$ABUND,
fittedPredictedResponse = apply(preds, 2, median),
integerResponse = FALSE
)
loyn.resids |> plot()loyn.resids <- make_brms_dharma_res(loyn.brm3, integerResponse = FALSE)
wrap_elements(~ testUniformity(loyn.resids)) +
wrap_elements(~ plotResiduals(loyn.resids, form = factor(rep(1, nrow(loyn))))) +
wrap_elements(~ plotResiduals(loyn.resids, quantreg = TRUE)) +
wrap_elements(~ testDispersion(loyn.resids))Conclusions:
- the simulated residuals suggest a general lack of fit due to over-dispersion and outliers
- perhaps we should explore a negative binomial model
8 Partial effects plots
loyn.rstanarm3 |>
fitted_draws(newdata = loyn) |>
median_hdci() |>
ggplot(aes(x = AREA, y = .value)) +
geom_ribbon(aes(ymin = .lower, ymax = .upper), fill = "blue", alpha = 0.3) +
geom_line() +
geom_point(data = loyn, aes(y = ABUND, x = AREA)) +
scale_y_log10()Warning: `fitted_draws` and `add_fitted_draws` are deprecated as their names were confusing.
- Use [add_]epred_draws() to get the expectation of the posterior predictive.
- Use [add_]linpred_draws() to get the distribution of the linear predictor.
- For example, you used [add_]fitted_draws(..., scale = "response"), which
means you most likely want [add_]epred_draws(...).
NOTE: When updating to the new functions, note that the `model` parameter is now
named `object` and the `n` parameter is now named `ndraws`.
loyn.brm3 |>
conditional_effects() |>
plot(ask = FALSE, points = TRUE, plot = FALSE) |>
wrap_plots()loyn.brm3 |>
conditional_effects() |>
plot(ask = FALSE, points = TRUE, plot = FALSE) |>
wrap_plots() &
scale_y_log10()g <- loyn.brm3 |>
conditional_effects() |>
plot(ask = FALSE, points = TRUE, plot = FALSE)
library(patchwork)
length(g)[1] 6
(g[[1]] + scale_x_log10()) +
(g[[2]] + scale_x_log10()) +
(g[[3]] + scale_x_log10()) +
g[[4]] +
g[[5]] +
g[[6]]Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning in scale_y_log10(): log-10 transformation introduced infinite values.
log-10 transformation introduced infinite values.
log-10 transformation introduced infinite values.
log-10 transformation introduced infinite values.
log-10 transformation introduced infinite values.
log-10 transformation introduced infinite values.
log-10 transformation introduced infinite values.
This will be slow …
loyn.brm3 |>
ggemmeans("AREA[0:1000]") |>
plot(show_data = TRUE) |>
wrap_plots() &
scale_y_log10() &
scale_x_log10()Data points may overlap. Use the `jitter` argument to add some amount of
random variation to the location of data points and avoid overplotting.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
loyn.brm3 |>
epred_draws(newdata = loyn) |>
median_hdci(.epred) |>
ggplot(aes(x = AREA, y = .epred, colour = fGRAZE, fill = fGRAZE)) +
geom_ribbon(aes(ymin = .lower, ymax = .upper), colour = NA, alpha = 0.3) +
geom_line() +
geom_point(data = loyn, aes(y = ABUND, x = AREA)) +
scale_y_log10() +
scale_x_log10()9 Model investigation
The summary() method generates simple summaries (mean, standard deviation as well as 10, 50 and 90 percentiles).
Model Info:
function: stan_glm
family: gaussian [log]
formula: ABUND ~ scale(log(DIST)) + scale(log(LDIST)) + scale(log(AREA)) +
fGRAZE + scale(ALT) + scale(YR.ISOL)
algorithm: sampling
sample: 1500 (posterior sample size)
priors: see help('prior_summary')
observations: 56
predictors: 10
Estimates:
mean sd 10% 50% 90%
(Intercept) 3.0 0.1 2.9 3.1 3.2
scale(log(DIST)) 0.0 0.1 -0.1 0.0 0.1
scale(log(LDIST)) 0.1 0.1 0.0 0.1 0.1
scale(log(AREA)) 0.2 0.1 0.1 0.2 0.3
fGRAZE2 0.0 0.2 -0.2 0.0 0.2
fGRAZE3 0.0 0.1 -0.1 0.0 0.2
fGRAZE4 0.0 0.2 -0.2 0.0 0.2
fGRAZE5 -1.1 0.3 -1.5 -1.0 -0.6
scale(ALT) 0.0 0.1 -0.1 0.0 0.1
scale(YR.ISOL) 0.0 0.1 -0.1 0.0 0.1
sigma 6.6 0.7 5.8 6.5 7.5
Fit Diagnostics:
mean sd 10% 50% 90%
mean_PPD 19.3 1.3 17.7 19.4 20.9
The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
MCMC diagnostics
mcse Rhat n_eff
(Intercept) 0.0 1.0 1431
scale(log(DIST)) 0.0 1.0 1510
scale(log(LDIST)) 0.0 1.0 1173
scale(log(AREA)) 0.0 1.0 1328
fGRAZE2 0.0 1.0 1118
fGRAZE3 0.0 1.0 1528
fGRAZE4 0.0 1.0 1588
fGRAZE5 0.0 1.0 1444
scale(ALT) 0.0 1.0 1396
scale(YR.ISOL) 0.0 1.0 1405
sigma 0.0 1.0 1601
mean_PPD 0.0 1.0 1576
log-posterior 0.1 1.0 1476
For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
Conclusions:
- in the Model Info, we are informed that the total MCMC posterior sample size is 1500 and that there were 56 raw observations.
- the estimated intercept (expected bird abundance when grazing intensity is equal to 1 and all of the continuous predictors are at their respective averages) is 3.05. This is the mean of the posterior distribution for this parameter. If we back-transform this to the response scale, this becomes 21.03.
- the estimated slope associated with
DIST(rate at which the abundance of birds changes per 1 unit change in log-transformed Distance to nearest patch holding other continuous predictors constant and grazing intensity at level 1), is -0.01 (mean) or -0.09 (median) with a standard deviation of 0. The 90% credibility intervals indicate that we are 90% confident that the slope is between -0.01 and -0.02 - e.g. there is no evidence of a significant trend.
- associated with each of the continuous predictors is a partial slope. Each partial slope is the rate of change between bird abundance and the associated predictor (on the log scale due to the link and based on 1 unit change in the predictor on the scale of the predictor). For example, for every one unit change in centred log patch Area, bird abundance is expected to increase by (log) 0.21.
- if we back transform this slope (by exponentiation), we get a partial slope for centred log Area of 1.24. This is interpreted as - for every 1 unit increase in (scaled log) Area, the bird abundance is expected to increase 1.24 fold. That is, there is a 23.96 percent increase per 1 unit increase in centred log Area.
- Rhat and number of effective samples for each parameter are also provided as MCMC diagnostics and all look good.
loyn.rstanarm3$stanfit |>
summarise_draws(
median,
HDInterval::hdi,
rhat, length, ess_bulk, ess_tail
)# A tibble: 13 × 8
variable median lower upper rhat length ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 3.05e+0 2.82e+0 3.25e+0 1.00 1500 1314. 1292.
2 scale(log(DIST)) -1.55e-2 -1.35e-1 9.69e-2 1.00 1500 1528. 1330.
3 scale(log(LDIST)) 5.53e-2 -6.55e-2 1.73e-1 1.00 1500 1226. 1377.
4 scale(log(AREA)) 2.14e-1 9.80e-2 3.38e-1 1.00 1500 1352. 1495.
5 fGRAZE2 3.94e-2 -2.53e-1 3.40e-1 1.00 1500 1121. 1199.
6 fGRAZE3 3.51e-2 -2.17e-1 3.24e-1 1.000 1500 1505. 1539.
7 fGRAZE4 1.39e-2 -3.03e-1 3.21e-1 0.999 1500 1576. 1498.
8 fGRAZE5 -1.05e+0 -1.78e+0 -4.27e-1 1.000 1500 1454. 1382.
9 scale(ALT) 2.61e-4 -9.91e-2 9.96e-2 1.00 1500 1402. 1465.
10 scale(YR.ISOL) -1.94e-3 -1.38e-1 1.56e-1 1.00 1500 1412. 1314.
11 sigma 6.51e+0 5.32e+0 7.87e+0 0.999 1500 1619. 1538.
12 mean_PPD 1.94e+1 1.68e+1 2.17e+1 1.00 1500 1573. 1418.
13 log-posterior -1.96e+2 -2.01e+2 -1.92e+2 1.00 1500 1454. 1498.
We can also alter the CI level.
loyn.rstanarm3$stanfit |>
summarise_draws(
median,
~ HDInterval::hdi(.x, credMass = 0.9),
rhat, length, ess_bulk, ess_tail
)# A tibble: 13 × 8
variable median lower upper rhat length ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 3.05e+0 2.86e+0 3.22e+0 1.00 1500 1314. 1292.
2 scale(log(DIST)) -1.55e-2 -1.06e-1 8.62e-2 1.00 1500 1528. 1330.
3 scale(log(LDIST)) 5.53e-2 -4.03e-2 1.62e-1 1.00 1500 1226. 1377.
4 scale(log(AREA)) 2.14e-1 1.16e-1 3.15e-1 1.00 1500 1352. 1495.
5 fGRAZE2 3.94e-2 -2.31e-1 2.67e-1 1.00 1500 1121. 1199.
6 fGRAZE3 3.51e-2 -2.06e-1 2.50e-1 1.000 1500 1505. 1539.
7 fGRAZE4 1.39e-2 -2.78e-1 2.61e-1 0.999 1500 1576. 1498.
8 fGRAZE5 -1.05e+0 -1.60e+0 -5.00e-1 1.000 1500 1454. 1382.
9 scale(ALT) 2.61e-4 -8.06e-2 8.20e-2 1.00 1500 1402. 1465.
10 scale(YR.ISOL) -1.94e-3 -1.23e-1 1.25e-1 1.00 1500 1412. 1314.
11 sigma 6.51e+0 5.42e+0 7.57e+0 0.999 1500 1619. 1538.
12 mean_PPD 1.94e+1 1.72e+1 2.13e+1 1.00 1500 1573. 1418.
13 log-posterior -1.96e+2 -2.00e+2 -1.93e+2 1.00 1500 1454. 1498.
And on a ratio scale
loyn.rstanarm3$stanfit |>
summarise_draws(
~ median(exp(.x)),
~ HDInterval::hdi(exp(.x)),
rhat, length, ess_bulk, ess_tail
)# A tibble: 13 × 8
variable `~median(exp(.x))` lower upper rhat length ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Interce… 2.11e+ 1 1.68e+ 1 2.59e+ 1 1.00 1500 1314. 1292.
2 scale(lo… 9.85e- 1 8.74e- 1 1.10e+ 0 1.00 1500 1528. 1330.
3 scale(lo… 1.06e+ 0 9.37e- 1 1.19e+ 0 1.00 1500 1226. 1377.
4 scale(lo… 1.24e+ 0 1.10e+ 0 1.40e+ 0 1.00 1500 1352. 1495.
5 fGRAZE2 1.04e+ 0 7.40e- 1 1.37e+ 0 1.00 1500 1121. 1199.
6 fGRAZE3 1.04e+ 0 7.78e- 1 1.35e+ 0 1.000 1500 1505. 1539.
7 fGRAZE4 1.01e+ 0 7.37e- 1 1.38e+ 0 0.999 1500 1576. 1498.
8 fGRAZE5 3.51e- 1 1.52e- 1 6.06e- 1 1.000 1500 1454. 1382.
9 scale(AL… 1.00e+ 0 9.06e- 1 1.10e+ 0 1.00 1500 1402. 1465.
10 scale(YR… 9.98e- 1 8.58e- 1 1.15e+ 0 1.00 1500 1412. 1314.
11 sigma 6.74e+ 2 1.34e+ 2 2.38e+ 3 0.999 1500 1619. 1538.
12 mean_PPD 2.54e+ 8 2.38e+ 6 1.93e+ 9 1.00 1500 1573. 1418.
13 log-post… 6.70e-86 2.71e-90 1.61e-84 1.00 1500 1454. 1498.
# A tibble: 1,500 × 20
.chain .iteration .draw `(Intercept)` `scale(log(DIST))` `scale(log(LDIST))`
<int> <int> <int> <dbl> <dbl> <dbl>
1 1 1 1 3.08 -0.0174 0.121
2 1 2 2 3.09 -0.0160 0.0146
3 1 3 3 3.03 -0.0785 0.131
4 1 4 4 3.21 -0.00790 0.134
5 1 5 5 3.12 -0.0146 0.127
6 1 6 6 3.00 -0.0490 0.0986
7 1 7 7 3.05 -0.00366 0.0971
8 1 8 8 3.24 -0.147 0.143
9 1 9 9 3.04 -0.0501 0.0807
10 1 10 10 3.07 0.00675 -0.0319
# ℹ 1,490 more rows
# ℹ 14 more variables: `scale(log(AREA))` <dbl>, fGRAZE2 <dbl>, fGRAZE3 <dbl>,
# fGRAZE4 <dbl>, fGRAZE5 <dbl>, `scale(ALT)` <dbl>, `scale(YR.ISOL)` <dbl>,
# sigma <dbl>, accept_stat__ <dbl>, stepsize__ <dbl>, treedepth__ <dbl>,
# n_leapfrog__ <dbl>, divergent__ <dbl>, energy__ <dbl>
# A draws_df: 500 iterations, 3 chains, and 13 variables
(Intercept) scale(log(DIST)) scale(log(LDIST)) scale(log(AREA)) fGRAZE2
1 3.1 -0.0174 0.121 0.163 -0.045
2 3.1 -0.0160 0.015 0.230 0.137
3 3.0 -0.0785 0.131 0.177 -0.114
4 3.2 -0.0079 0.134 0.042 -0.113
5 3.1 -0.0146 0.127 0.193 0.034
6 3.0 -0.0490 0.099 0.222 0.109
7 3.1 -0.0037 0.097 0.258 0.131
8 3.2 -0.1472 0.143 0.127 -0.238
9 3.0 -0.0501 0.081 0.264 0.108
10 3.1 0.0068 -0.032 0.247 -0.059
fGRAZE3 fGRAZE4 fGRAZE5
1 -0.0293 -0.0068 -1.03
2 -0.0458 0.0464 -0.95
3 0.0950 -0.1209 -1.48
4 0.0044 -0.1113 -1.88
5 -0.0602 0.0266 -1.05
6 0.0711 0.0781 -1.05
7 -0.1016 0.0114 -1.27
8 -0.2376 -0.0266 -1.87
9 0.0044 -0.0143 -0.82
10 -0.0496 -0.2269 -0.48
# ... with 1490 more draws, and 5 more variables
# ... hidden reserved variables {'.chain', '.iteration', '.draw'}
loyn.rstanarm3$stanfit |>
as_draws_df() |>
summarise_draws(
"median",
~ HDInterval::hdi(.x),
"rhat",
"ess_bulk"
)# A tibble: 13 × 6
variable median lower upper rhat ess_bulk
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 3.05 2.82 3.25 1.00 1314.
2 scale(log(DIST)) -0.0155 -0.135 0.0969 1.00 1528.
3 scale(log(LDIST)) 0.0553 -0.0655 0.173 1.00 1226.
4 scale(log(AREA)) 0.214 0.0980 0.338 1.00 1352.
5 fGRAZE2 0.0394 -0.253 0.340 1.00 1121.
6 fGRAZE3 0.0351 -0.217 0.324 1.000 1505.
7 fGRAZE4 0.0139 -0.303 0.321 0.999 1576.
8 fGRAZE5 -1.05 -1.78 -0.427 1.000 1454.
9 scale(ALT) 0.000261 -0.0991 0.0996 1.00 1402.
10 scale(YR.ISOL) -0.00194 -0.138 0.156 1.00 1412.
11 sigma 6.51 5.32 7.87 0.999 1619.
12 mean_PPD 19.4 16.8 21.7 1.00 1573.
13 log-posterior -196. -201. -192. 1.00 1454.
tidyMCMC(loyn.rstanarm3$stanfit,
estimate.method = "median", conf.int = TRUE,
conf.method = "HPDinterval", rhat = TRUE, ess = TRUE
)# A tibble: 13 × 7
term estimate std.error conf.low conf.high rhat ess
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 (Intercept) 3.05 0.112 2.82 3.25 1.00 1431
2 scale(log(DIST)) -0.0147 0.0596 -0.135 0.0969 0.999 1510
3 scale(log(LDIST)) 0.0533 0.0621 -0.0655 0.173 1.00 1173
4 scale(log(AREA)) 0.215 0.0611 0.0980 0.338 0.999 1328
5 fGRAZE2 0.0370 0.151 -0.253 0.340 1.00 1118
6 fGRAZE3 0.0392 0.138 -0.217 0.324 1.000 1528
7 fGRAZE4 0.00520 0.164 -0.303 0.321 0.999 1588
8 fGRAZE5 -1.07 0.342 -1.78 -0.427 0.999 1444
9 scale(ALT) -0.000419 0.0508 -0.0991 0.0996 1.00 1396
10 scale(YR.ISOL) 0.00347 0.0762 -0.138 0.156 0.999 1405
11 sigma 6.56 0.664 5.32 7.87 0.999 1601
12 mean_PPD 19.3 1.25 16.8 21.7 1.00 1576
13 log-posterior -196. 2.50 -201. -192. 1.00 1476
Conclusions:
- the estimated intercept (expected bird abundance when grazing intensity is equal to 1 and all of the continuous predictors are at their respective averages) is NA. This is the mean of the posterior distribution for this parameter. If we back-transform this to the response scale, this becomes NA.
- the estimated slope associated with
DIST(rate at which the abundance of birds changes per 1 unit change in log-transformed Distance to nearest patch holding other continuous predictors constant and grazing intensity at level 1), is NA (mean) or -0.13 with a standard deviation of -0.01. The 90% credibility intervals indicate that we are 90% confident that the slope is between NA and 0.1 - e.g. there is no evidence of a significant trend.
- associated with each of the continuous predictors is a partial slope. Each partial slope is the rate of change between bird abundance and the associated predictor (on the log scale due to the link and based on 1 unit change in the predictor on the scale of the predictor). For example, for every one unit change in centred log patch Area, bird abundance is expected to increase by (log) NA.
- if we back transform this slope (by exponentiation), we get a partial slope for centred log Area of NA. This is interpreted as - for every 1 unit increase in (scaled log) Area, the bird abundance is expected to increase NA fold. That is, there is a NA percent increase per 1 unit increase in centred log Area.
- Rhat and number of effective samples for each parameter are also provided as MCMC diagnostics and all look good.
[1] "(Intercept)" "scale(log(DIST))" "scale(log(LDIST))"
[4] "scale(log(AREA))" "fGRAZE2" "fGRAZE3"
[7] "fGRAZE4" "fGRAZE5" "scale(ALT)"
[10] "scale(YR.ISOL)" "sigma" "accept_stat__"
[13] "stepsize__" "treedepth__" "n_leapfrog__"
[16] "divergent__" "energy__"
loyn.draw <- loyn.rstanarm3 |> gather_draws(`.Intercept.*|.*AREA.*|.*DIST.*|.*GRAZE.*|.*ALT.*|.*YR.*`, regex = TRUE)
loyn.draw# A tibble: 15,000 × 5
# Groups: .variable [10]
.chain .iteration .draw .variable .value
<int> <int> <int> <chr> <dbl>
1 1 1 1 (Intercept) 3.08
2 1 2 2 (Intercept) 3.09
3 1 3 3 (Intercept) 3.03
4 1 4 4 (Intercept) 3.21
5 1 5 5 (Intercept) 3.12
6 1 6 6 (Intercept) 3.00
7 1 7 7 (Intercept) 3.05
8 1 8 8 (Intercept) 3.24
9 1 9 9 (Intercept) 3.04
10 1 10 10 (Intercept) 3.07
# ℹ 14,990 more rows
loyn.rstanarm3 |>
gather_draws(`.*AREA.*|.*DIST.*|.*GRAZE.*|.*ALT.*|.*YR.*`, regex = TRUE) |>
ggplot() +
stat_halfeye(aes(x = .value, y = .variable)) +
facet_wrap(~.variable, scales = "free")loyn.rstanarm3 |>
gather_draws(`.*AREA.*|.*DIST.*|.*GRAZE.*|.*ALT.*|.*YR.*`, regex = TRUE) |>
ggplot() +
stat_halfeye(aes(x = .value, y = .variable)) +
geom_vline(xintercept = 1, linetype = "dashed")loyn.rstanarm3 |>
gather_draws(`.*AREA.*|.*DIST.*|.*GRAZE.*|.*ALT.*|.*YR.*`, regex = TRUE) |>
ggplot() +
geom_density_ridges(aes(x = .value, y = .variable), alpha = 0.4) +
geom_vline(xintercept = 0, linetype = "dashed")Picking joint bandwidth of 0.0249
## Or on a fractional scale
loyn.rstanarm3 |>
gather_draws(`.*AREA.*|.*DIST.*|.*GRAZE.*|.*ALT.*|.*YR.*`, regex = TRUE) |>
ggplot() +
geom_density_ridges_gradient(
aes(
x = exp(.value),
y = .variable,
fill = stat(x)
),
alpha = 0.4, colour = "white",
quantile_lines = TRUE,
quantiles = c(0.025, 0.975)
) +
geom_vline(xintercept = 1, linetype = "dashed") +
scale_x_continuous(trans = scales::log2_trans()) +
scale_fill_viridis_c(option = "C")Warning: `stat(x)` was deprecated in ggplot2 3.4.0.
ℹ Please use `after_stat(x)` instead.
Picking joint bandwidth of 0.0359
loyn.rstanarm3 |> spread_draws(`.Intercept.*|.*DIST.*|.*AREA.*|.*GRAZE.*|.*ALT.*|.*YR.*`, regex = TRUE)# A tibble: 1,500 × 13
.chain .iteration .draw `(Intercept)` `scale(log(DIST))` `scale(log(LDIST))`
<int> <int> <int> <dbl> <dbl> <dbl>
1 1 1 1 3.08 -0.0174 0.121
2 1 2 2 3.09 -0.0160 0.0146
3 1 3 3 3.03 -0.0785 0.131
4 1 4 4 3.21 -0.00790 0.134
5 1 5 5 3.12 -0.0146 0.127
6 1 6 6 3.00 -0.0490 0.0986
7 1 7 7 3.05 -0.00366 0.0971
8 1 8 8 3.24 -0.147 0.143
9 1 9 9 3.04 -0.0501 0.0807
10 1 10 10 3.07 0.00675 -0.0319
# ℹ 1,490 more rows
# ℹ 7 more variables: `scale(log(AREA))` <dbl>, fGRAZE2 <dbl>, fGRAZE3 <dbl>,
# fGRAZE4 <dbl>, fGRAZE5 <dbl>, `scale(ALT)` <dbl>, `scale(YR.ISOL)` <dbl>
Warning: `as.tibble()` was deprecated in tibble 2.0.0.
ℹ Please use `as_tibble()` instead.
ℹ The signature and semantics have changed, see `?as_tibble`.
Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
recommended alternatives.
# A tibble: 1,500 × 11
`(Intercept)` `scale(log(DIST))` `scale(log(LDIST))` `scale(log(AREA))`
<dbl> <dbl> <dbl> <dbl>
1 3.08 -0.0174 0.121 0.163
2 3.09 -0.0160 0.0146 0.230
3 3.03 -0.0785 0.131 0.177
4 3.21 -0.00790 0.134 0.0423
5 3.12 -0.0146 0.127 0.193
6 3.00 -0.0490 0.0986 0.222
7 3.05 -0.00366 0.0971 0.258
8 3.24 -0.147 0.143 0.127
9 3.04 -0.0501 0.0807 0.264
10 3.07 0.00675 -0.0319 0.247
# ℹ 1,490 more rows
# ℹ 7 more variables: fGRAZE2 <dbl>, fGRAZE3 <dbl>, fGRAZE4 <dbl>,
# fGRAZE5 <dbl>, `scale(ALT)` <dbl>, `scale(YR.ISOL)` <dbl>, sigma <dbl>
The summary() method generates simple summaries (mean, standard deviation as well as 10, 50 and 90 percentiles).
Family: gaussian
Links: mu = log; sigma = identity
Formula: ABUND ~ scale(log(DIST)) + scale(log(LDIST)) + scale(log(AREA)) + fGRAZE + scale(ALT) + scale(YR.ISOL)
Data: loyn (Number of observations: 56)
Draws: 3 chains, each with iter = 5000; warmup = 2500; thin = 5;
total post-warmup draws = 1500
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 3.13 0.10 2.93 3.33 1.00 1462 1409
scalelogDIST -0.01 0.05 -0.11 0.11 1.00 1430 1208
scalelogLDIST 0.04 0.06 -0.07 0.15 1.00 1459 1421
scalelogAREA 0.19 0.05 0.08 0.29 1.00 1472 1589
fGRAZE2 0.02 0.14 -0.27 0.30 1.00 1346 1384
fGRAZE3 0.02 0.13 -0.23 0.27 1.00 1457 1410
fGRAZE4 -0.01 0.15 -0.32 0.26 1.00 1498 1497
fGRAZE5 -0.75 0.26 -1.26 -0.24 1.00 1399 1423
scaleALT 0.00 0.05 -0.09 0.10 1.00 1478 1354
scaleYR.ISOL -0.00 0.07 -0.13 0.14 1.00 1589 1500
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma 6.68 0.74 5.45 8.34 1.00 1427 1445
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
Conclusions:
- in the Model Info, we are informed that the total MCMC posterior sample size is 1500 and that there were 56 raw observations.
- the estimated intercept (expected bird abundance when grazing intensity is equal to 1 and all of the continuous predictors are at their respective averages) is 3.13. This is the mean of the posterior distribution for this parameter. If we back-transform this to the response scale, this becomes 22.96.
- the estimated slope associated with
DIST(rate at which the abundance of birds changes per 1 unit change in log-transformed Distance to nearest patch holding other continuous predictors constant and grazing intensity at level 1), is -0.01 (mean) or 0.11 (median) with a standard deviation of 0.05. The 90% credibility intervals indicate that we are 90% confident that the slope is between -0.01 and 1 - e.g. there is no evidence of a significant trend.
- associated with each of the continuous predictors is a partial slope. Each partial slope is the rate of change between bird abundance and the associated predictor (on the log scale due to the link and based on 1 unit change in the predictor on the scale of the predictor). For example, for every one unit change in centred log patch Area, bird abundance is expected to increase by (log) 0.19.
- if we back transform this slope (by exponentiation), we get a partial slope for centred log Area of 1.21. This is interpreted as - for every 1 unit increase in (scaled log) Area, the bird abundance is expected to increase 1.21 fold. That is, there is a 20.58 percent increase per 1 unit increase in centred log Area.
- Rhat and number of effective samples for each parameter are also provided as MCMC diagnostics and all look good.
# A tibble: 1,500 × 26
.chain .iteration .draw b_Intercept b_scalelogDIST b_scalelogLDIST
<int> <int> <int> <dbl> <dbl> <dbl>
1 1 1 1 3.34 0.0149 0.0522
2 1 2 2 3.30 0.00367 -0.0606
3 1 3 3 3.17 -0.0803 0.124
4 1 4 4 3.13 0.0278 0.0564
5 1 5 5 3.07 0.0844 -0.00819
6 1 6 6 3.27 -0.0823 0.0615
7 1 7 7 3.08 -0.0335 0.0585
8 1 8 8 2.86 0.0160 -0.0661
9 1 9 9 3.08 -0.151 0.111
10 1 10 10 3.07 0.0115 0.0223
# ℹ 1,490 more rows
# ℹ 20 more variables: b_scalelogAREA <dbl>, b_fGRAZE2 <dbl>, b_fGRAZE3 <dbl>,
# b_fGRAZE4 <dbl>, b_fGRAZE5 <dbl>, b_scaleALT <dbl>, b_scaleYR.ISOL <dbl>,
# sigma <dbl>, Intercept <dbl>, prior_Intercept <dbl>, prior_b <dbl>,
# prior_sigma <dbl>, lprior <dbl>, lp__ <dbl>, accept_stat__ <dbl>,
# stepsize__ <dbl>, treedepth__ <dbl>, n_leapfrog__ <dbl>, divergent__ <dbl>,
# energy__ <dbl>
loyn.brm3 |>
tidy_draws() |>
dplyr::select(starts_with("b_")) |>
exp() |>
summarise_draws(
median,
HDInterval::hdi,
rhat,
length,
ess_bulk, ess_tail
)# A tibble: 10 × 8
variable median lower upper rhat length ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 b_Intercept 23.0 18.6 27.7 1.00 1500 1435. 1300.
2 b_scalelogDIST 0.988 0.886 1.10 1.000 1500 1425. 1152.
3 b_scalelogLDIST 1.04 0.933 1.16 1.000 1500 1442. 1414.
4 b_scalelogAREA 1.21 1.08 1.34 1.000 1500 1467. 1580.
5 b_fGRAZE2 1.03 0.754 1.33 0.999 1500 1339. 1378.
6 b_fGRAZE3 1.02 0.771 1.28 1.00 1500 1437. 1385.
7 b_fGRAZE4 0.991 0.723 1.29 1.00 1500 1491. 1395.
8 b_fGRAZE5 0.468 0.265 0.741 1.00 1500 1387. 1404.
9 b_scaleALT 1.00 0.909 1.10 1.00 1500 1467. 1316.
10 b_scaleYR.ISOL 0.994 0.867 1.13 1.00 1500 1573. 1491.
loyn.brm3 |>
spread_draws(`^b_.*|sigma`, regex = TRUE) |>
exp() |>
summarise_draws(
median,
HDInterval::hdi,
rhat,
length,
ess_bulk, ess_tail
)# A tibble: 11 × 8
variable median lower upper rhat length ess_bulk ess_tail
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 b_Intercept 23.0 18.6 27.7 0.999 1500 1462. 1409.
2 b_scalelogDIST 0.988 0.886 1.10 1.00 1500 1430. 1208.
3 b_scalelogLDIST 1.04 0.933 1.16 1.000 1500 1459. 1421.
4 b_scalelogAREA 1.21 1.08 1.34 0.999 1500 1472. 1589.
5 b_fGRAZE2 1.03 0.754 1.33 1.00 1500 1346. 1384.
6 b_fGRAZE3 1.02 0.771 1.28 1.00 1500 1457. 1410.
7 b_fGRAZE4 0.991 0.723 1.29 1.00 1500 1498. 1497.
8 b_fGRAZE5 0.468 0.265 0.741 1.00 1500 1399. 1423.
9 b_scaleALT 1.00 0.909 1.10 1.00 1500 1478. 1354.
10 b_scaleYR.ISOL 0.994 0.867 1.13 1.00 1500 1590. 1500.
11 sigma 728. 113. 2859. 1.000 1500 1427. 1445.
# A draws_df: 500 iterations, 3 chains, and 17 variables
b_Intercept b_scalelogDIST b_scalelogLDIST b_scalelogAREA b_fGRAZE2
1 3.3 0.0149 0.0522 0.063 0.016
2 3.3 0.0037 -0.0606 0.187 -0.362
3 3.2 -0.0803 0.1235 0.200 0.079
4 3.1 0.0278 0.0564 0.169 0.098
5 3.1 0.0844 -0.0082 0.218 0.058
6 3.3 -0.0823 0.0615 0.153 -0.035
7 3.1 -0.0335 0.0585 0.193 0.025
8 2.9 0.0160 -0.0661 0.304 0.249
9 3.1 -0.1512 0.1106 0.259 0.126
10 3.1 0.0115 0.0223 0.263 0.159
b_fGRAZE3 b_fGRAZE4 b_fGRAZE5
1 -0.313 -0.3272 -1.05
2 -0.278 -0.3141 -0.76
3 0.076 0.1654 -1.08
4 0.041 -0.0066 -0.75
5 0.068 -0.1216 -0.63
6 -0.073 -0.2212 -0.91
7 -0.089 0.1757 -0.76
8 0.266 0.1890 -0.61
9 0.075 0.1766 -0.56
10 0.109 0.1147 -0.68
# ... with 1490 more draws, and 9 more variables
# ... hidden reserved variables {'.chain', '.iteration', '.draw'}
loyn.brm3 |>
as_draws_df() |>
dplyr::select(matches("^b_.*|^sigma$")) |>
mutate(across(everything(), exp)) |>
summarise_draws(
"median",
~ HDInterval::hdi(.x),
"rhat",
"ess_bulk"
)Warning: Dropping 'draws_df' class as required metadata was removed.
# A tibble: 11 × 6
variable median lower upper rhat ess_bulk
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 b_Intercept 23.0 18.6 27.7 1.00 1435.
2 b_scalelogDIST 0.988 0.886 1.10 1.000 1425.
3 b_scalelogLDIST 1.04 0.933 1.16 1.000 1442.
4 b_scalelogAREA 1.21 1.08 1.34 1.000 1466.
5 b_fGRAZE2 1.03 0.754 1.33 0.999 1339.
6 b_fGRAZE3 1.02 0.771 1.28 1.00 1437.
7 b_fGRAZE4 0.991 0.723 1.29 1.00 1491.
8 b_fGRAZE5 0.468 0.265 0.741 1.00 1387.
9 b_scaleALT 1.00 0.909 1.10 1.00 1467.
10 b_scaleYR.ISOL 0.994 0.867 1.13 1.00 1573.
11 sigma 728. 113. 2859. 0.999 1426.
loyn.brm3$fit |>
tidyMCMC(
estimate.method = "median",
conf.int = TRUE, conf.method = "HPDinterval",
rhat = TRUE, ess = TRUE
)# A tibble: 16 × 7
term estimate std.error conf.low conf.high rhat ess
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 b_Intercept 3.13 0.102 2.94 3.33 0.999 1439
2 b_scalelogDIST -0.0108 0.0547 -0.110 0.108 0.999 1426
3 b_scalelogLDIST 0.0437 0.0589 -0.0689 0.149 0.999 1452
4 b_scalelogAREA 0.187 0.0544 0.0798 0.292 0.998 1468
5 b_fGRAZE2 0.0243 0.144 -0.236 0.328 1.00 1358
6 b_fGRAZE3 0.0219 0.128 -0.207 0.285 1.00 1454
7 b_fGRAZE4 -0.0122 0.151 -0.324 0.253 0.999 1477
8 b_fGRAZE5 -0.754 0.258 -1.27 -0.250 1.00 1382
9 b_scaleALT 0.00269 0.0484 -0.0950 0.0921 0.999 1477
10 b_scaleYR.ISOL -0.00287 0.0699 -0.143 0.126 1.00 1589
11 sigma 6.68 0.743 5.37 8.15 1.000 1445
12 Intercept 2.97 0.0466 2.87 3.05 0.999 1509
13 prior_Intercept 3.40 0.100 3.22 3.61 0.999 1405
14 prior_b 0.0138 1.49 -2.70 3.07 1.00 1538
15 prior_sigma 1.67 1.84 0.00264 5.27 1.00 1431
16 lprior -25.0 1.97 -28.9 -21.4 0.999 1528
Conclusions:
- the estimated intercept (expected bird abundance when grazing intensity is equal to 1 and all of the continuous predictors are at their respective averages) is NA. This is the mean of the posterior distribution for this parameter. If we back-transform this to the response scale, this becomes NA.
- the estimated slope associated with
DIST(rate at which the abundance of birds changes per 1 unit change in log-transformed Distance to nearest patch holding other continuous predictors constant and grazing intensity at level 1), is NA (mean) or -0.11 with a standard deviation of -0.01. The 90% credibility intervals indicate that we are 90% confident that the slope is between NA and 0.11 - e.g. there is no evidence of a significant trend.
- associated with each of the continuous predictors is a partial slope. Each partial slope is the rate of change between bird abundance and the associated predictor (on the log scale due to the link and based on 1 unit change in the predictor on the scale of the predictor). For example, for every one unit change in centred log patch Area, bird abundance is expected to increase by (log) NA.
- if we back transform this slope (by exponentiation), we get a partial slope for centred log Area of NA. This is interpreted as - for every 1 unit increase in (scaled log) Area, the bird abundance is expected to increase NA fold. That is, there is a NA percent increase per 1 unit increase in centred log Area.
- Rhat and number of effective samples for each parameter are also provided as MCMC diagnostics and all look good.
[1] "b_Intercept" "b_scalelogDIST" "b_scalelogLDIST" "b_scalelogAREA"
[5] "b_fGRAZE2" "b_fGRAZE3" "b_fGRAZE4" "b_fGRAZE5"
[9] "b_scaleALT" "b_scaleYR.ISOL" "sigma" "Intercept"
[13] "prior_Intercept" "prior_b" "prior_sigma" "lprior"
[17] "lp__" "accept_stat__" "stepsize__" "treedepth__"
[21] "n_leapfrog__" "divergent__" "energy__"
# A tibble: 15,000 × 5
# Groups: .variable [10]
.chain .iteration .draw .variable .value
<int> <int> <int> <chr> <dbl>
1 1 1 1 b_Intercept 3.34
2 1 2 2 b_Intercept 3.30
3 1 3 3 b_Intercept 3.17
4 1 4 4 b_Intercept 3.13
5 1 5 5 b_Intercept 3.07
6 1 6 6 b_Intercept 3.27
7 1 7 7 b_Intercept 3.08
8 1 8 8 b_Intercept 2.86
9 1 9 9 b_Intercept 3.08
10 1 10 10 b_Intercept 3.07
# ℹ 14,990 more rows
loyn.brm3 |>
gather_draws(`^b_.*`, regex = TRUE) |>
filter(.variable != "b_Intercept") |>
ggplot() +
stat_halfeye(aes(x = .value, y = .variable)) +
facet_wrap(~.variable, scales = "free")loyn.brm3 |>
gather_draws(`^b_.*`, regex = TRUE) |>
mutate(.value = exp(.value)) |>
filter(.variable != "b_Intercept") |>
ggplot() +
stat_halfeye(aes(x = .value, y = .variable)) +
geom_vline(xintercept = 1, linetype = "dashed") +
scale_x_continuous(trans = scales::log2_trans()) +
theme_classic()loyn.brm3 |>
gather_draws(`^b_.*`, regex = TRUE) |>
filter(.variable != "b_Intercept") |>
ggplot() +
geom_density_ridges(aes(x = .value, y = .variable), alpha = 0.4) +
geom_vline(xintercept = 0, linetype = "dashed")Picking joint bandwidth of 0.0221
## Or on a fractional scale
loyn.brm3 |>
gather_draws(`^b_.*`, regex = TRUE) |>
filter(.variable != "b_Intercept") |>
ggplot() +
geom_density_ridges_gradient(
aes(
x = exp(.value),
y = .variable,
fill = stat(x)
),
alpha = 0.4, colour = "white",
quantile_lines = TRUE,
quantiles = c(0.025, 0.975)
) +
geom_vline(xintercept = 1, linetype = "dashed") +
scale_x_continuous(trans = scales::log2_trans()) +
scale_fill_viridis_c(option = "C")Picking joint bandwidth of 0.0319
# A tibble: 1,500 × 13
.chain .iteration .draw b_Intercept b_scalelogDIST b_scalelogLDIST
<int> <int> <int> <dbl> <dbl> <dbl>
1 1 1 1 3.34 0.0149 0.0522
2 1 2 2 3.30 0.00367 -0.0606
3 1 3 3 3.17 -0.0803 0.124
4 1 4 4 3.13 0.0278 0.0564
5 1 5 5 3.07 0.0844 -0.00819
6 1 6 6 3.27 -0.0823 0.0615
7 1 7 7 3.08 -0.0335 0.0585
8 1 8 8 2.86 0.0160 -0.0661
9 1 9 9 3.08 -0.151 0.111
10 1 10 10 3.07 0.0115 0.0223
# ℹ 1,490 more rows
# ℹ 7 more variables: b_scalelogAREA <dbl>, b_fGRAZE2 <dbl>, b_fGRAZE3 <dbl>,
# b_fGRAZE4 <dbl>, b_fGRAZE5 <dbl>, b_scaleALT <dbl>, b_scaleYR.ISOL <dbl>
Warning: Method 'posterior_samples' is deprecated. Please see ?as_draws for
recommended alternatives.
# A tibble: 1,500 × 17
b_Intercept b_scalelogDIST b_scalelogLDIST b_scalelogAREA b_fGRAZE2 b_fGRAZE3
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 3.34 0.0149 0.0522 0.0635 0.0157 -0.313
2 3.30 0.00367 -0.0606 0.187 -0.362 -0.278
3 3.17 -0.0803 0.124 0.200 0.0791 0.0764
4 3.13 0.0278 0.0564 0.169 0.0978 0.0406
5 3.07 0.0844 -0.00819 0.218 0.0584 0.0683
6 3.27 -0.0823 0.0615 0.153 -0.0354 -0.0731
7 3.08 -0.0335 0.0585 0.193 0.0254 -0.0887
8 2.86 0.0160 -0.0661 0.304 0.249 0.266
9 3.08 -0.151 0.111 0.259 0.126 0.0746
10 3.07 0.0115 0.0223 0.263 0.159 0.109
# ℹ 1,490 more rows
# ℹ 11 more variables: b_fGRAZE4 <dbl>, b_fGRAZE5 <dbl>, b_scaleALT <dbl>,
# b_scaleYR.ISOL <dbl>, sigma <dbl>, Intercept <dbl>, prior_Intercept <dbl>,
# prior_b <dbl>, prior_sigma <dbl>, lprior <dbl>, lp__ <dbl>
Region of Practical Equivalence
For standardised parameter (negligible effect)
[1] 0.09024351
[1] -0.01 0.01
# Proportion of samples inside the ROPE [-0.09, 0.09]:
Parameter | inside ROPE
---------------------------
Intercept | 0.00 %
scalelogDIST | 93.89 %
scalelogLDIST | 78.65 %
scalelogAREA | 1.76 %
fGRAZE2 | 48.67 %
fGRAZE3 | 53.30 %
fGRAZE4 | 50.07 %
fGRAZE5 | 0.00 %
scaleALT | 98.81 %
scaleYR.ISOL | 84.83 %
10 Further analyses
loyn.rstanarm4a <- update(loyn.rstanarm3, . ~ scale(log(DIST)) * scale(log(LDIST)),
diagnostic_file = file.path(tempdir(), "dfa.csv")
)
loyn.rstanarm4b <- update(loyn.rstanarm3, . ~ scale(log(AREA)) * fGRAZE,
diagnostic_file = file.path(tempdir(), "dfb.csv")
)
loyn.rstanarm4c <- update(loyn.rstanarm3, . ~ scale(log(AREA)) * fGRAZE * scale(YR.ISOL),
diagnostic_file = file.path(tempdir(), "dfc.csv")
)
loyn.rstanarm4d <- update(loyn.rstanarm3, . ~ scale(ALT),
diagnostic_file = file.path(tempdir(), "dfd.csv")
)
loyn.rstanarm4e <- update(loyn.rstanarm3, . ~ 1,
diagnostic_file = file.path(tempdir(), "dfe.csv")
)
loo_compare(
loo(loyn.rstanarm4a),
loo(loyn.rstanarm4e)
) elpd_diff se_diff
loyn.rstanarm4e 0.0 0.0
loyn.rstanarm4a -2.4 1.5
Warning: Found 2 observation(s) with a pareto_k > 0.7. We recommend calling 'loo' again with argument 'k_threshold = 0.7' in order to calculate the ELPD without the assumption that these observations are negligible. This will refit the model 2 times to compute the ELPDs for the problematic observations directly.
elpd_diff se_diff
loyn.rstanarm4b 0.0 0.0
loyn.rstanarm4e -30.4 7.2
Warning: Found 8 observation(s) with a pareto_k > 0.7. We recommend calling 'loo' again with argument 'k_threshold = 0.7' in order to calculate the ELPD without the assumption that these observations are negligible. This will refit the model 8 times to compute the ELPDs for the problematic observations directly.
elpd_diff se_diff
loyn.rstanarm4c 0.0 0.0
loyn.rstanarm4e -22.0 6.8
elpd_diff se_diff
loyn.rstanarm4d 0.0 0.0
loyn.rstanarm4e -3.6 2.3
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Estimated Bayes factor in favor of x1 over x2: 0.00138
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Estimated Bayes factor in favor of x1 over x2: 1032891552.95408
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Estimated Bayes factor in favor of x1 over x2: 7541.08718
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Estimated Bayes factor in favor of x1 over x2: 5.03167
loyn.brm4a <- update(loyn.brm3, . ~ scale(log(DIST)) * scale(log(LDIST)),
save_pars = save_pars(all = TRUE), refresh = 0
)Start sampling
loyn.brm4b <- update(loyn.brm3, . ~ scale(log(AREA)) * fGRAZE,
save_pars = save_pars(all = TRUE), refresh = 0
)Start sampling
loyn.brm4c <- update(loyn.brm3, . ~ scale(log(AREA)) * fGRAZE * scale(YR.ISOL),
save_pars = save_pars(all = TRUE), refresh = 0
)Start sampling
Start sampling
The desired updates require recompiling the model
Compiling Stan program...
Start sampling
Warning:
2 (3.6%) p_waic estimates greater than 0.4. We recommend trying loo instead.
Computed from 1500 by 56 log-likelihood matrix.
Estimate SE
elpd_waic -217.0 4.4
p_waic 3.9 0.8
waic 433.9 8.9
2 (3.6%) p_waic estimates greater than 0.4. We recommend trying loo instead.
Computed from 1500 by 56 log-likelihood matrix.
Estimate SE
elpd_loo -217.1 4.5
p_loo 4.0 0.8
looic 434.2 8.9
------
MCSE of elpd_loo is 0.1.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.8, 1.1]).
All Pareto k estimates are good (k < 0.69).
See help('pareto-k-diagnostic') for details.
Computed from 1500 by 56 log-likelihood matrix.
Estimate SE
elpd_loo -215.1 4.5
p_loo 1.2 0.2
looic 430.3 9.1
------
MCSE of elpd_loo is 0.0.
MCSE and ESS estimates assume MCMC draws (r_eff in [0.8, 1.0]).
All Pareto k estimates are good (k < 0.69).
See help('pareto-k-diagnostic') for details.
elpd_diff se_diff
loyn.brm4e 0.0 0.0
loyn.brm4a -2.0 1.2
Warning: Found 4 observations with a pareto_k > 0.69 in model 'loyn.brm4b'. We
recommend to run more iterations to get at least about 2200 posterior draws to
improve LOO-CV approximation accuracy.
elpd_diff se_diff
loyn.brm4b 0.0 0.0
loyn.brm4e -29.7 7.1
Warning: Found 2 observations with a pareto_k > 0.69 in model 'loyn.brm4b'. We
recommend to run more iterations to get at least about 2200 posterior draws to
improve LOO-CV approximation accuracy.
elpd_diff se_diff
loyn.brm4b 0.0 0.0
loyn.brm4e -29.4 7.1
elpd_diff se_diff
loyn.brm4c 0.0 0.0
loyn.brm4e -19.4 7.1
elpd_diff se_diff
loyn.brm4d 0.0 0.0
loyn.brm4e -3.5 2.0
An alternative is to compute Bayes factors based on bridge sampling. Note, this process usually requires far greater number of posterior samples (10x) in order to be stable. It is also advisable to run this multiple times to ensure stability.
It calculates the marginal likelihood of one model in favour of another. The larger the value, the more evidence there is of one model over the other.
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Estimated Bayes factor in favor of loyn.brm4a over loyn.brm4e: 0.00019
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Estimated Bayes factor in favor of loyn.brm4e over loyn.brm4a: 5191.63135
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Estimated Bayes factor in favor of loyn.brm4b over loyn.brm4e: 51442.85107
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Estimated Bayes factor in favor of loyn.brm4e over loyn.brm4b: 0.00002
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Estimated Bayes factor in favor of loyn.brm4c over loyn.brm4e: 0.00582
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Estimated Bayes factor in favor of loyn.brm4e over loyn.brm4c: 176.77261
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Estimated Bayes factor in favor of loyn.brm4d over loyn.brm4e: 1.72185
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Estimated Bayes factor in favor of loyn.brm4e over loyn.brm4d: 0.58180
Compare effect of grazing at different patch areas
loyn.list <- with(loyn, list(AREA = c(min(AREA), mean(AREA), max(AREA))))
loyn.brm4b |>
emmeans(~ fGRAZE | AREA, at = loyn.list, type = "response") |>
pairs(reverse = FALSE)Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
AREA = 0.1:
contrast ratio lower.HPD upper.HPD
fGRAZE1 / fGRAZE2 1.867 0.85546 3.351
fGRAZE1 / fGRAZE3 2.030 1.02795 3.672
fGRAZE1 / fGRAZE4 6.168 0.87360 26.707
fGRAZE1 / fGRAZE5 4.348 1.12820 9.676
fGRAZE2 / fGRAZE3 1.094 0.43183 2.088
fGRAZE2 / fGRAZE4 3.344 0.33782 14.274
fGRAZE2 / fGRAZE5 2.335 0.53289 5.446
fGRAZE3 / fGRAZE4 3.040 0.39667 13.101
fGRAZE3 / fGRAZE5 2.144 0.60860 5.145
fGRAZE4 / fGRAZE5 0.696 0.01849 2.465
AREA = 69.2696428571429:
contrast ratio lower.HPD upper.HPD
fGRAZE1 / fGRAZE2 0.909 0.69403 1.141
fGRAZE1 / fGRAZE3 0.865 0.64843 1.128
fGRAZE1 / fGRAZE4 0.572 0.25954 0.944
fGRAZE1 / fGRAZE5 1.643 0.72730 3.203
fGRAZE2 / fGRAZE3 0.952 0.64462 1.302
fGRAZE2 / fGRAZE4 0.625 0.28939 1.087
fGRAZE2 / fGRAZE5 1.808 0.74788 3.652
fGRAZE3 / fGRAZE4 0.656 0.26679 1.140
fGRAZE3 / fGRAZE5 1.870 0.78107 3.860
fGRAZE4 / fGRAZE5 2.963 0.95598 6.632
AREA = 1771:
contrast ratio lower.HPD upper.HPD
fGRAZE1 / fGRAZE2 0.638 0.33287 1.027
fGRAZE1 / fGRAZE3 0.563 0.28298 1.013
fGRAZE1 / fGRAZE4 0.177 0.01323 0.585
fGRAZE1 / fGRAZE5 1.018 0.12486 3.470
fGRAZE2 / fGRAZE3 0.884 0.31829 1.805
fGRAZE2 / fGRAZE4 0.280 0.01934 1.001
fGRAZE2 / fGRAZE5 1.624 0.17584 6.076
fGRAZE3 / fGRAZE4 0.302 0.00675 1.137
fGRAZE3 / fGRAZE5 1.771 0.21812 6.937
fGRAZE4 / fGRAZE5 5.974 0.27372 36.809
Point estimate displayed: median
Results are back-transformed from the log scale
HPD interval probability: 0.95
newdata <- loyn.brm4b |>
emmeans(~ fGRAZE | AREA, at = loyn.list, type = "response") |>
pairs() |>
as.data.frame()Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
contrast AREA ratio lower.HPD upper.HPD
fGRAZE1 / fGRAZE2 0.1 1.866643 0.8554560 3.351042
fGRAZE1 / fGRAZE3 0.1 2.029561 1.0279454 3.672192
fGRAZE1 / fGRAZE4 0.1 6.167821 0.8735980 26.707174
fGRAZE1 / fGRAZE5 0.1 4.348256 1.1281965 9.675951
fGRAZE2 / fGRAZE3 0.1 1.093714 0.4318253 2.088409
fGRAZE2 / fGRAZE4 0.1 3.343609 0.3378181 14.274354
Point estimate displayed: median
Results are back-transformed from the log scale
HPD interval probability: 0.95
newdata <- loyn.brm4b |>
emmeans(~ fGRAZE | AREA, at = loyn.list, type = "response") |>
pairs() |>
gather_emmeans_draws()Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
newdata |>
median_hdci() |>
ggplot() +
geom_hline(yintercept = 1, linetype = "dashed") +
geom_pointrange(aes(y = .value, ymin = .lower, ymax = .upper, x = contrast)) +
facet_wrap(~AREA) +
coord_flip()Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
# A tibble: 22,500 × 6
# Groups: fGRAZE, AREA [15]
fGRAZE AREA .chain .iteration .draw .value
<fct> <dbl> <int> <int> <int> <dbl>
1 1 0.1 NA NA 1 2.89
2 1 0.1 NA NA 2 3.09
3 1 0.1 NA NA 3 3.01
4 1 0.1 NA NA 4 2.85
5 1 0.1 NA NA 5 2.97
6 1 0.1 NA NA 6 3.22
7 1 0.1 NA NA 7 3.12
8 1 0.1 NA NA 8 3.05
9 1 0.1 NA NA 9 3.19
10 1 0.1 NA NA 10 3.07
# ℹ 22,490 more rows
`summarise()` has grouped output by 'contrast'. You can override using the
`.groups` argument.
g <- newdata |>
ggplot() +
geom_vline(xintercept = 1, linetype = "dashed") +
stat_slab(aes(
x = .value, y = contrast,
fill = stat(ggdist::cut_cdf_qi(cdf,
.width = c(0.5, 0.8, 0.95),
labels = scales::percent_format()
))
), color = "black") +
scale_fill_brewer("Interval", direction = -1, na.translate = FALSE) +
facet_grid(~ round(AREA, 1))11 Summary figure
loyn.list <- with(loyn, list(AREA = modelr::seq_range(AREA, n = 100)))
newdata <- emmeans(loyn.brm3, ~ AREA | fGRAZE, at = loyn.list, type = "response") |>
as.data.frame()Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
AREA fGRAZE response lower.HPD upper.HPD
0.10000 1 15.03861 9.582079 22.04046
17.98788 1 25.22851 21.141488 30.17126
35.87576 1 27.12200 23.213343 31.88078
53.76364 1 28.17709 23.982953 32.63987
71.65152 1 28.98566 24.909605 33.66697
89.53939 1 29.65230 25.457034 34.33955
Point estimate displayed: median
Results are back-transformed from the log scale
HPD interval probability: 0.95
ggplot(newdata, aes(y = response, x = AREA)) +
geom_ribbon(aes(ymin = lower.HPD, ymax = upper.HPD, fill = fGRAZE), alpha = 0.3) +
geom_line(aes(color = fGRAZE)) +
theme_bw() +
scale_x_log10() +
scale_y_log10()spaghetti <- emmeans(loyn.rstanarm3, ~ AREA | fGRAZE, at = loyn.list, type = "response") |>
gather_emmeans_draws() |>
mutate(Fit = exp(.value))
wch <- sample(1:max(spaghetti$.draw), 100, replace = FALSE)
spaghetti <- spaghetti |>
filter(.draw %in% wch)
ggplot(newdata) +
geom_line(data = spaghetti, aes(
y = Fit, x = AREA, color = fGRAZE,
group = interaction(fGRAZE, .draw)
), alpha = 0.05) +
geom_line(aes(y = response, x = AREA, color = fGRAZE)) +
theme_bw() +
scale_x_log10() +
scale_y_log10()## or honouring the data range
loyn.nd <- loyn |>
group_by(fGRAZE) |>
tidyr::expand(
AREA = modelr::seq_range(AREA, n = 100),
DIST = mean(DIST), LDIST = mean(LDIST), ALT = mean(ALT), YR.ISOL = mean(YR.ISOL)
)
loyn.rstanarm3 |>
epred_draws(newdata = loyn.nd, value = ".value") |>
median_hdci() |>
ggplot(aes(x = AREA, y = .value, colour = fGRAZE, fill = fGRAZE)) +
geom_ribbon(aes(ymin = .lower, ymax = .upper), colour = NA, alpha = 0.3) +
geom_line() +
geom_point(data = loyn, aes(y = ABUND, x = AREA)) +
scale_y_log10() +
scale_x_log10() +
theme_bw()loyn.list <- with(loyn, list(AREA = seq(min(AREA), max(AREA), len = 100)))
newdata <- emmeans(loyn.rstanarm4b, ~ AREA | fGRAZE, at = loyn.list, type = "response") |>
as.data.frame()
head(newdata) AREA fGRAZE response lower.HPD upper.HPD
0.10000 1 19.03816 11.62715 26.90043
17.98788 1 26.16248 22.53196 29.96777
35.87576 1 27.28563 23.98798 30.61037
53.76364 1 27.95703 24.68977 31.09034
71.65152 1 28.48415 25.05349 31.40785
89.53939 1 28.88190 25.37502 31.83853
Point estimate displayed: median
Results are back-transformed from the log scale
HPD interval probability: 0.95
ggplot(newdata, aes(y = response, x = AREA)) +
geom_ribbon(aes(ymin = lower.HPD, ymax = upper.HPD, fill = fGRAZE), alpha = 0.3) +
geom_line(aes(color = fGRAZE)) +
theme_bw() +
scale_x_log10() +
scale_y_log10()spaghetti <- emmeans(loyn.rstanarm4b, ~ AREA | fGRAZE, at = loyn.list, type = "response") |>
gather_emmeans_draws() |>
mutate(Fit = exp(.value))
wch <- sample(1:max(spaghetti$.draw), 100, replace = FALSE)
spaghetti <- spaghetti |> filter(.draw %in% wch)
ggplot(newdata) +
geom_line(data = spaghetti, aes(
y = Fit, x = AREA, color = fGRAZE,
group = interaction(fGRAZE, .draw)
), alpha = 0.05) +
geom_line(aes(y = response, x = AREA, color = fGRAZE)) +
theme_bw() +
scale_x_log10() +
scale_y_log10()## or honouring the data range
loyn.nd <- loyn |>
group_by(fGRAZE) |>
tidyr::expand(AREA = modelr::seq_range(AREA, n = 100))
loyn.rstanarm4b |>
epred_draws(newdata = loyn.nd, value = ".value") |>
median_hdci() |>
ggplot(aes(x = AREA, y = .value, colour = fGRAZE, fill = fGRAZE)) +
geom_ribbon(aes(ymin = .lower, ymax = .upper), colour = NA, alpha = 0.3) +
geom_line() +
geom_point(data = loyn, aes(y = ABUND, x = AREA)) +
scale_y_log10() +
scale_x_log10() +
theme_bw()loyn.list <- with(loyn, list(AREA = modelr::seq_range(AREA, n = 100)))
newdata <- emmeans(loyn.brm3, ~ AREA | fGRAZE, at = loyn.list, type = "response") |>
as.data.frame()Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
AREA fGRAZE response lower.HPD upper.HPD
0.10000 1 15.03861 9.582079 22.04046
17.98788 1 25.22851 21.141488 30.17126
35.87576 1 27.12200 23.213343 31.88078
53.76364 1 28.17709 23.982953 32.63987
71.65152 1 28.98566 24.909605 33.66697
89.53939 1 29.65230 25.457034 34.33955
Point estimate displayed: median
Results are back-transformed from the log scale
HPD interval probability: 0.95
ggplot(newdata, aes(y = response, x = AREA)) +
geom_ribbon(aes(ymin = lower.HPD, ymax = upper.HPD, fill = fGRAZE), alpha = 0.3) +
geom_line(aes(color = fGRAZE)) +
theme_bw() +
scale_x_log10() +
scale_y_log10()spaghetti <- emmeans(loyn.brm3, ~ AREA | fGRAZE, at = loyn.list, type = "response") |>
gather_emmeans_draws() |>
mutate(Fit = exp(.value))Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
wch <- sample(1:max(spaghetti$.draw), 100, replace = FALSE)
spaghetti <- spaghetti |> filter(.draw %in% wch)
ggplot(newdata) +
geom_line(data = spaghetti, aes(
y = Fit, x = AREA, color = fGRAZE,
group = interaction(fGRAZE, .draw)
), alpha = 0.1) +
geom_line(aes(y = response, x = AREA, color = fGRAZE)) +
theme_bw() +
scale_x_log10() +
scale_y_log10()# or honouring the data range
loyn.nd <- loyn |>
group_by(fGRAZE) |>
tidyr::expand(
AREA = modelr::seq_range(AREA, n = 100),
DIST = mean(DIST), LDIST = mean(LDIST), ALT = mean(ALT), YR.ISOL = mean(YR.ISOL)
)
loyn.brm3 |>
epred_draws(newdata = loyn.nd, value = ".value") |>
median_hdci() |>
ggplot(aes(x = AREA, y = .value, colour = fGRAZE, fill = fGRAZE)) +
geom_ribbon(aes(ymin = .lower, ymax = .upper), colour = NA, alpha = 0.3) +
geom_line() +
geom_point(data = loyn, aes(y = ABUND, x = AREA)) +
scale_y_log10() +
scale_x_log10() +
theme_bw()Lets explore the relationship between bird abundance and patch area for each grazing intensity separately.
loyn.list <- with(loyn, list(AREA = modelr::seq_range(AREA, n = 100)))
newdata <- emmeans(loyn.brm4b, ~ AREA | fGRAZE, at = loyn.list, type = "response") |>
as.data.frame()Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
AREA fGRAZE response lower.HPD upper.HPD
0.10000 1 20.81781 13.72009 29.82664
17.98788 1 27.41168 23.65333 31.07247
35.87576 1 28.38843 25.13827 31.71887
53.76364 1 28.99924 26.05395 32.39693
71.65152 1 29.44425 26.43203 32.78452
89.53939 1 29.82336 26.63454 33.03599
Point estimate displayed: median
Results are back-transformed from the log scale
HPD interval probability: 0.95
ggplot(newdata, aes(y = response, x = AREA)) +
geom_ribbon(aes(ymin = lower.HPD, ymax = upper.HPD, fill = fGRAZE), alpha = 0.3) +
geom_line(aes(color = fGRAZE)) +
theme_bw() +
scale_x_log10() +
scale_y_log10()spaghetti <- emmeans(loyn.brm4b, ~ AREA | fGRAZE, at = loyn.list, type = "response") |>
gather_emmeans_draws() |>
mutate(Fit = exp(.value))Warning: Method 'parse_bf' is deprecated. Please use 'brmsterms' instead.
wch <- sample(1:max(spaghetti$.draw), 100, replace = FALSE)
spaghetti <- spaghetti |> filter(.draw %in% wch)
ggplot(newdata) +
geom_line(data = spaghetti, aes(
y = Fit, x = AREA, color = fGRAZE,
group = interaction(fGRAZE, .draw)
), alpha = 0.1) +
geom_line(aes(y = response, x = AREA, color = fGRAZE)) +
theme_bw() +
scale_x_log10() +
scale_y_log10()## or honouring the data range
loyn.nd <- loyn |>
group_by(fGRAZE) |>
tidyr::expand(AREA = modelr::seq_range(AREA, n = 100))
loyn.brm4b |>
epred_draws(newdata = loyn.nd, value = ".value") |>
median_hdci() |>
ggplot(aes(x = AREA, y = .value, colour = fGRAZE, fill = fGRAZE)) +
geom_ribbon(aes(ymin = .lower, ymax = .upper), colour = NA, alpha = 0.3) +
geom_line() +
geom_point(data = loyn, aes(y = ABUND, x = AREA)) +
scale_y_log10() +
scale_x_log10() +
theme_bw()